| 1 | /**************************************************************************** |
| 2 | * Copyright (C) 2012-2015 Woboq GmbH |
| 3 | * Olivier Goffart <contact at woboq.com> |
| 4 | * https://woboq.com/codebrowser.html |
| 5 | * |
| 6 | * This file is part of the Woboq Code Browser. |
| 7 | * |
| 8 | * Commercial License Usage: |
| 9 | * Licensees holding valid commercial licenses provided by Woboq may use |
| 10 | * this file in accordance with the terms contained in a written agreement |
| 11 | * between the licensee and Woboq. |
| 12 | * For further information see https://woboq.com/codebrowser.html |
| 13 | * |
| 14 | * Alternatively, this work may be used under a Creative Commons |
| 15 | * Attribution-NonCommercial-ShareAlike 3.0 (CC-BY-NC-SA 3.0) License. |
| 16 | * http://creativecommons.org/licenses/by-nc-sa/3.0/deed.en_US |
| 17 | * This license does not allow you to use the code browser to assist the |
| 18 | * development of your commercial software. If you intent to do so, consider |
| 19 | * purchasing a commercial licence. |
| 20 | ****************************************************************************/ |
| 21 | |
| 22 | |
| 23 | #pragma once |
| 24 | |
| 25 | #include <utility> |
| 26 | #include <vector> |
| 27 | #include <string> |
| 28 | |
| 29 | |
| 30 | struct EmbeddedFile { |
| 31 | const char *filename; |
| 32 | const char *content; |
| 33 | size_t size; |
| 34 | template <int N> |
| 35 | constexpr EmbeddedFile(const char *filename, const char (&data)[N]) |
| 36 | : filename(filename) , content(data), size(N-1) {} |
| 37 | constexpr EmbeddedFile () : filename(nullptr) , content(nullptr), size(0) {} |
| 38 | }; |
| 39 | |
| 40 | static constexpr EmbeddedFile EmbeddedFiles[] = { |
| 41 | { "/builtins/__clang_cuda_builtin_vars.h" , "/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===\n" |
| 42 | " *\n" |
| 43 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 44 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 45 | " * in the Software without restriction, including without limitation the rights\n" |
| 46 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 47 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 48 | " * furnished to do so, subject to the following conditions:\n" |
| 49 | " *\n" |
| 50 | " * The above copyright notice and this permission notice shall be included in\n" |
| 51 | " * all copies or substantial portions of the Software.\n" |
| 52 | " *\n" |
| 53 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 54 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 55 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 56 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 57 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 58 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 59 | " * THE SOFTWARE.\n" |
| 60 | " *\n" |
| 61 | " *===-----------------------------------------------------------------------===\n" |
| 62 | " */\n" |
| 63 | "\n" |
| 64 | "#ifndef __CUDA_BUILTIN_VARS_H\n" |
| 65 | "#define __CUDA_BUILTIN_VARS_H\n" |
| 66 | "\n" |
| 67 | "// Forward declares from vector_types.h.\n" |
| 68 | "struct uint3;\n" |
| 69 | "struct dim3;\n" |
| 70 | "\n" |
| 71 | "// The file implements built-in CUDA variables using __declspec(property).\n" |
| 72 | "// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx\n" |
| 73 | "// All read accesses of built-in variable fields get converted into calls to a\n" |
| 74 | "// getter function which in turn calls the appropriate builtin to fetch the\n" |
| 75 | "// value.\n" |
| 76 | "//\n" |
| 77 | "// Example:\n" |
| 78 | "// int x = threadIdx.x;\n" |
| 79 | "// IR output:\n" |
| 80 | "// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3\n" |
| 81 | "// PTX output:\n" |
| 82 | "// mov.u32 %r2, %tid.x;\n" |
| 83 | "\n" |
| 84 | "#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \\\n" |
| 85 | " __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \\\n" |
| 86 | " static inline __attribute__((always_inline)) \\\n" |
| 87 | " __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \\\n" |
| 88 | " return INTRINSIC; \\\n" |
| 89 | " }\n" |
| 90 | "\n" |
| 91 | "#if __cplusplus >= 201103L\n" |
| 92 | "#define __DELETE =delete\n" |
| 93 | "#else\n" |
| 94 | "#define __DELETE\n" |
| 95 | "#endif\n" |
| 96 | "\n" |
| 97 | "// Make sure nobody can create instances of the special variable types. nvcc\n" |
| 98 | "// also disallows taking address of special variables, so we disable address-of\n" |
| 99 | "// operator as well.\n" |
| 100 | "#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \\\n" |
| 101 | " __attribute__((device)) TypeName() __DELETE; \\\n" |
| 102 | " __attribute__((device)) TypeName(const TypeName &) __DELETE; \\\n" |
| 103 | " __attribute__((device)) void operator=(const TypeName &) const __DELETE; \\\n" |
| 104 | " __attribute__((device)) TypeName *operator&() const __DELETE\n" |
| 105 | "\n" |
| 106 | "struct __cuda_builtin_threadIdx_t {\n" |
| 107 | " __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());\n" |
| 108 | " __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());\n" |
| 109 | " __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());\n" |
| 110 | " // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n" |
| 111 | " // uint3). This function is defined after we pull in vector_types.h.\n" |
| 112 | " __attribute__((device)) operator uint3() const;\n" |
| 113 | "private:\n" |
| 114 | " __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);\n" |
| 115 | "};\n" |
| 116 | "\n" |
| 117 | "struct __cuda_builtin_blockIdx_t {\n" |
| 118 | " __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());\n" |
| 119 | " __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());\n" |
| 120 | " __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());\n" |
| 121 | " // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a\n" |
| 122 | " // uint3). This function is defined after we pull in vector_types.h.\n" |
| 123 | " __attribute__((device)) operator uint3() const;\n" |
| 124 | "private:\n" |
| 125 | " __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);\n" |
| 126 | "};\n" |
| 127 | "\n" |
| 128 | "struct __cuda_builtin_blockDim_t {\n" |
| 129 | " __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());\n" |
| 130 | " __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());\n" |
| 131 | " __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());\n" |
| 132 | " // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a\n" |
| 133 | " // dim3). This function is defined after we pull in vector_types.h.\n" |
| 134 | " __attribute__((device)) operator dim3() const;\n" |
| 135 | "private:\n" |
| 136 | " __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);\n" |
| 137 | "};\n" |
| 138 | "\n" |
| 139 | "struct __cuda_builtin_gridDim_t {\n" |
| 140 | " __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());\n" |
| 141 | " __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());\n" |
| 142 | " __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());\n" |
| 143 | " // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a\n" |
| 144 | " // dim3). This function is defined after we pull in vector_types.h.\n" |
| 145 | " __attribute__((device)) operator dim3() const;\n" |
| 146 | "private:\n" |
| 147 | " __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);\n" |
| 148 | "};\n" |
| 149 | "\n" |
| 150 | "#define __CUDA_BUILTIN_VAR \\\n" |
| 151 | " extern const __attribute__((device)) __attribute__((weak))\n" |
| 152 | "__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;\n" |
| 153 | "__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;\n" |
| 154 | "__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;\n" |
| 155 | "__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;\n" |
| 156 | "\n" |
| 157 | "// warpSize should translate to read of %WARP_SZ but there's currently no\n" |
| 158 | "// builtin to do so. According to PTX v4.2 docs 'to date, all target\n" |
| 159 | "// architectures have a WARP_SZ value of 32'.\n" |
| 160 | "__attribute__((device)) const int warpSize = 32;\n" |
| 161 | "\n" |
| 162 | "#undef __CUDA_DEVICE_BUILTIN\n" |
| 163 | "#undef __CUDA_BUILTIN_VAR\n" |
| 164 | "#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS\n" |
| 165 | "\n" |
| 166 | "#endif /* __CUDA_BUILTIN_VARS_H */\n" |
| 167 | "" } , |
| 168 | { "/builtins/__clang_cuda_cmath.h" , "/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===\n" |
| 169 | " *\n" |
| 170 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 171 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 172 | " * in the Software without restriction, including without limitation the rights\n" |
| 173 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 174 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 175 | " * furnished to do so, subject to the following conditions:\n" |
| 176 | " *\n" |
| 177 | " * The above copyright notice and this permission notice shall be included in\n" |
| 178 | " * all copies or substantial portions of the Software.\n" |
| 179 | " *\n" |
| 180 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 181 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 182 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 183 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 184 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 185 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 186 | " * THE SOFTWARE.\n" |
| 187 | " *\n" |
| 188 | " *===-----------------------------------------------------------------------===\n" |
| 189 | " */\n" |
| 190 | "#ifndef __CLANG_CUDA_CMATH_H__\n" |
| 191 | "#define __CLANG_CUDA_CMATH_H__\n" |
| 192 | "#ifndef __CUDA__\n" |
| 193 | "#error \"This file is for CUDA compilation only.\"\n" |
| 194 | "#endif\n" |
| 195 | "\n" |
| 196 | "#include <limits>\n" |
| 197 | "\n" |
| 198 | "// CUDA lets us use various std math functions on the device side. This file\n" |
| 199 | "// works in concert with __clang_cuda_math_forward_declares.h to make this work.\n" |
| 200 | "//\n" |
| 201 | "// Specifically, the forward-declares header declares __device__ overloads for\n" |
| 202 | "// these functions in the global namespace, then pulls them into namespace std\n" |
| 203 | "// with 'using' statements. Then this file implements those functions, after\n" |
| 204 | "// their implementations have been pulled in.\n" |
| 205 | "//\n" |
| 206 | "// It's important that we declare the functions in the global namespace and pull\n" |
| 207 | "// them into namespace std with using statements, as opposed to simply declaring\n" |
| 208 | "// these functions in namespace std, because our device functions need to\n" |
| 209 | "// overload the standard library functions, which may be declared in the global\n" |
| 210 | "// namespace or in std, depending on the degree of conformance of the stdlib\n" |
| 211 | "// implementation. Declaring in the global namespace and pulling into namespace\n" |
| 212 | "// std covers all of the known knowns.\n" |
| 213 | "\n" |
| 214 | "#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))\n" |
| 215 | "\n" |
| 216 | "__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }\n" |
| 217 | "__DEVICE__ long abs(long __n) { return ::labs(__n); }\n" |
| 218 | "__DEVICE__ float abs(float __x) { return ::fabsf(__x); }\n" |
| 219 | "__DEVICE__ double abs(double __x) { return ::fabs(__x); }\n" |
| 220 | "__DEVICE__ float acos(float __x) { return ::acosf(__x); }\n" |
| 221 | "__DEVICE__ float asin(float __x) { return ::asinf(__x); }\n" |
| 222 | "__DEVICE__ float atan(float __x) { return ::atanf(__x); }\n" |
| 223 | "__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }\n" |
| 224 | "__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }\n" |
| 225 | "__DEVICE__ float cos(float __x) { return ::cosf(__x); }\n" |
| 226 | "__DEVICE__ float cosh(float __x) { return ::coshf(__x); }\n" |
| 227 | "__DEVICE__ float exp(float __x) { return ::expf(__x); }\n" |
| 228 | "__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }\n" |
| 229 | "__DEVICE__ float floor(float __x) { return ::floorf(__x); }\n" |
| 230 | "__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }\n" |
| 231 | "__DEVICE__ int fpclassify(float __x) {\n" |
| 232 | " return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n" |
| 233 | " FP_ZERO, __x);\n" |
| 234 | "}\n" |
| 235 | "__DEVICE__ int fpclassify(double __x) {\n" |
| 236 | " return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,\n" |
| 237 | " FP_ZERO, __x);\n" |
| 238 | "}\n" |
| 239 | "__DEVICE__ float frexp(float __arg, int *__exp) {\n" |
| 240 | " return ::frexpf(__arg, __exp);\n" |
| 241 | "}\n" |
| 242 | "\n" |
| 243 | "// For inscrutable reasons, the CUDA headers define these functions for us on\n" |
| 244 | "// Windows.\n" |
| 245 | "#ifndef _MSC_VER\n" |
| 246 | "__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }\n" |
| 247 | "__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }\n" |
| 248 | "__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }\n" |
| 249 | "// For inscrutable reasons, __finite(), the double-precision version of\n" |
| 250 | "// __finitef, does not exist when compiling for MacOS. __isfinited is available\n" |
| 251 | "// everywhere and is just as good.\n" |
| 252 | "__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }\n" |
| 253 | "__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }\n" |
| 254 | "__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }\n" |
| 255 | "#endif\n" |
| 256 | "\n" |
| 257 | "__DEVICE__ bool isgreater(float __x, float __y) {\n" |
| 258 | " return __builtin_isgreater(__x, __y);\n" |
| 259 | "}\n" |
| 260 | "__DEVICE__ bool isgreater(double __x, double __y) {\n" |
| 261 | " return __builtin_isgreater(__x, __y);\n" |
| 262 | "}\n" |
| 263 | "__DEVICE__ bool isgreaterequal(float __x, float __y) {\n" |
| 264 | " return __builtin_isgreaterequal(__x, __y);\n" |
| 265 | "}\n" |
| 266 | "__DEVICE__ bool isgreaterequal(double __x, double __y) {\n" |
| 267 | " return __builtin_isgreaterequal(__x, __y);\n" |
| 268 | "}\n" |
| 269 | "__DEVICE__ bool isless(float __x, float __y) {\n" |
| 270 | " return __builtin_isless(__x, __y);\n" |
| 271 | "}\n" |
| 272 | "__DEVICE__ bool isless(double __x, double __y) {\n" |
| 273 | " return __builtin_isless(__x, __y);\n" |
| 274 | "}\n" |
| 275 | "__DEVICE__ bool islessequal(float __x, float __y) {\n" |
| 276 | " return __builtin_islessequal(__x, __y);\n" |
| 277 | "}\n" |
| 278 | "__DEVICE__ bool islessequal(double __x, double __y) {\n" |
| 279 | " return __builtin_islessequal(__x, __y);\n" |
| 280 | "}\n" |
| 281 | "__DEVICE__ bool islessgreater(float __x, float __y) {\n" |
| 282 | " return __builtin_islessgreater(__x, __y);\n" |
| 283 | "}\n" |
| 284 | "__DEVICE__ bool islessgreater(double __x, double __y) {\n" |
| 285 | " return __builtin_islessgreater(__x, __y);\n" |
| 286 | "}\n" |
| 287 | "__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }\n" |
| 288 | "__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }\n" |
| 289 | "__DEVICE__ bool isunordered(float __x, float __y) {\n" |
| 290 | " return __builtin_isunordered(__x, __y);\n" |
| 291 | "}\n" |
| 292 | "__DEVICE__ bool isunordered(double __x, double __y) {\n" |
| 293 | " return __builtin_isunordered(__x, __y);\n" |
| 294 | "}\n" |
| 295 | "__DEVICE__ float ldexp(float __arg, int __exp) {\n" |
| 296 | " return ::ldexpf(__arg, __exp);\n" |
| 297 | "}\n" |
| 298 | "__DEVICE__ float log(float __x) { return ::logf(__x); }\n" |
| 299 | "__DEVICE__ float log10(float __x) { return ::log10f(__x); }\n" |
| 300 | "__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }\n" |
| 301 | "__DEVICE__ float pow(float __base, float __exp) {\n" |
| 302 | " return ::powf(__base, __exp);\n" |
| 303 | "}\n" |
| 304 | "__DEVICE__ float pow(float __base, int __iexp) {\n" |
| 305 | " return ::powif(__base, __iexp);\n" |
| 306 | "}\n" |
| 307 | "__DEVICE__ double pow(double __base, int __iexp) {\n" |
| 308 | " return ::powi(__base, __iexp);\n" |
| 309 | "}\n" |
| 310 | "__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }\n" |
| 311 | "__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }\n" |
| 312 | "__DEVICE__ float sin(float __x) { return ::sinf(__x); }\n" |
| 313 | "__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }\n" |
| 314 | "__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }\n" |
| 315 | "__DEVICE__ float tan(float __x) { return ::tanf(__x); }\n" |
| 316 | "__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }\n" |
| 317 | "\n" |
| 318 | "// Notably missing above is nexttoward. We omit it because\n" |
| 319 | "// libdevice doesn't provide an implementation, and we don't want to be in the\n" |
| 320 | "// business of implementing tricky libm functions in this header.\n" |
| 321 | "\n" |
| 322 | "// Now we've defined everything we promised we'd define in\n" |
| 323 | "// __clang_cuda_math_forward_declares.h. We need to do two additional things to\n" |
| 324 | "// fix up our math functions.\n" |
| 325 | "//\n" |
| 326 | "// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define\n" |
| 327 | "// only sin(float) and sin(double), which means that e.g. sin(0) is\n" |
| 328 | "// ambiguous.\n" |
| 329 | "//\n" |
| 330 | "// 2) Pull the __device__ overloads of \"foobarf\" math functions into namespace\n" |
| 331 | "// std. These are defined in the CUDA headers in the global namespace,\n" |
| 332 | "// independent of everything else we've done here.\n" |
| 333 | "\n" |
| 334 | "// We can't use std::enable_if, because we want to be pre-C++11 compatible. But\n" |
| 335 | "// we go ahead and unconditionally define functions that are only available when\n" |
| 336 | "// compiling for C++11 to match the behavior of the CUDA headers.\n" |
| 337 | "template<bool __B, class __T = void>\n" |
| 338 | "struct __clang_cuda_enable_if {};\n" |
| 339 | "\n" |
| 340 | "template <class __T> struct __clang_cuda_enable_if<true, __T> {\n" |
| 341 | " typedef __T type;\n" |
| 342 | "};\n" |
| 343 | "\n" |
| 344 | "// Defines an overload of __fn that accepts one integral argument, calls\n" |
| 345 | "// __fn((double)x), and returns __retty.\n" |
| 346 | "#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \\\n" |
| 347 | " template <typename __T> \\\n" |
| 348 | " __DEVICE__ \\\n" |
| 349 | " typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \\\n" |
| 350 | " __retty>::type \\\n" |
| 351 | " __fn(__T __x) { \\\n" |
| 352 | " return ::__fn((double)__x); \\\n" |
| 353 | " }\n" |
| 354 | "\n" |
| 355 | "// Defines an overload of __fn that accepts one two arithmetic arguments, calls\n" |
| 356 | "// __fn((double)x, (double)y), and returns a double.\n" |
| 357 | "//\n" |
| 358 | "// Note this is different from OVERLOAD_1, which generates an overload that\n" |
| 359 | "// accepts only *integral* arguments.\n" |
| 360 | "#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \\\n" |
| 361 | " template <typename __T1, typename __T2> \\\n" |
| 362 | " __DEVICE__ typename __clang_cuda_enable_if< \\\n" |
| 363 | " std::numeric_limits<__T1>::is_specialized && \\\n" |
| 364 | " std::numeric_limits<__T2>::is_specialized, \\\n" |
| 365 | " __retty>::type \\\n" |
| 366 | " __fn(__T1 __x, __T2 __y) { \\\n" |
| 367 | " return __fn((double)__x, (double)__y); \\\n" |
| 368 | " }\n" |
| 369 | "\n" |
| 370 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)\n" |
| 371 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)\n" |
| 372 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)\n" |
| 373 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)\n" |
| 374 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)\n" |
| 375 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);\n" |
| 376 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)\n" |
| 377 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)\n" |
| 378 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)\n" |
| 379 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);\n" |
| 380 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)\n" |
| 381 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)\n" |
| 382 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)\n" |
| 383 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)\n" |
| 384 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)\n" |
| 385 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)\n" |
| 386 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)\n" |
| 387 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)\n" |
| 388 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);\n" |
| 389 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)\n" |
| 390 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);\n" |
| 391 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);\n" |
| 392 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);\n" |
| 393 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)\n" |
| 394 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);\n" |
| 395 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)\n" |
| 396 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)\n" |
| 397 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);\n" |
| 398 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);\n" |
| 399 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);\n" |
| 400 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);\n" |
| 401 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);\n" |
| 402 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);\n" |
| 403 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);\n" |
| 404 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)\n" |
| 405 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);\n" |
| 406 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)\n" |
| 407 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)\n" |
| 408 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)\n" |
| 409 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)\n" |
| 410 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)\n" |
| 411 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)\n" |
| 412 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)\n" |
| 413 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)\n" |
| 414 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)\n" |
| 415 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)\n" |
| 416 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);\n" |
| 417 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);\n" |
| 418 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);\n" |
| 419 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);\n" |
| 420 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);\n" |
| 421 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);\n" |
| 422 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)\n" |
| 423 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)\n" |
| 424 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)\n" |
| 425 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)\n" |
| 426 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)\n" |
| 427 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)\n" |
| 428 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)\n" |
| 429 | "__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);\n" |
| 430 | "\n" |
| 431 | "#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1\n" |
| 432 | "#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2\n" |
| 433 | "\n" |
| 434 | "// Overloads for functions that don't match the patterns expected by\n" |
| 435 | "// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.\n" |
| 436 | "template <typename __T1, typename __T2, typename __T3>\n" |
| 437 | "__DEVICE__ typename __clang_cuda_enable_if<\n" |
| 438 | " std::numeric_limits<__T1>::is_specialized &&\n" |
| 439 | " std::numeric_limits<__T2>::is_specialized &&\n" |
| 440 | " std::numeric_limits<__T3>::is_specialized,\n" |
| 441 | " double>::type\n" |
| 442 | "fma(__T1 __x, __T2 __y, __T3 __z) {\n" |
| 443 | " return std::fma((double)__x, (double)__y, (double)__z);\n" |
| 444 | "}\n" |
| 445 | "\n" |
| 446 | "template <typename __T>\n" |
| 447 | "__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n" |
| 448 | " double>::type\n" |
| 449 | "frexp(__T __x, int *__exp) {\n" |
| 450 | " return std::frexp((double)__x, __exp);\n" |
| 451 | "}\n" |
| 452 | "\n" |
| 453 | "template <typename __T>\n" |
| 454 | "__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n" |
| 455 | " double>::type\n" |
| 456 | "ldexp(__T __x, int __exp) {\n" |
| 457 | " return std::ldexp((double)__x, __exp);\n" |
| 458 | "}\n" |
| 459 | "\n" |
| 460 | "template <typename __T1, typename __T2>\n" |
| 461 | "__DEVICE__ typename __clang_cuda_enable_if<\n" |
| 462 | " std::numeric_limits<__T1>::is_specialized &&\n" |
| 463 | " std::numeric_limits<__T2>::is_specialized,\n" |
| 464 | " double>::type\n" |
| 465 | "remquo(__T1 __x, __T2 __y, int *__quo) {\n" |
| 466 | " return std::remquo((double)__x, (double)__y, __quo);\n" |
| 467 | "}\n" |
| 468 | "\n" |
| 469 | "template <typename __T>\n" |
| 470 | "__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n" |
| 471 | " double>::type\n" |
| 472 | "scalbln(__T __x, long __exp) {\n" |
| 473 | " return std::scalbln((double)__x, __exp);\n" |
| 474 | "}\n" |
| 475 | "\n" |
| 476 | "template <typename __T>\n" |
| 477 | "__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,\n" |
| 478 | " double>::type\n" |
| 479 | "scalbn(__T __x, int __exp) {\n" |
| 480 | " return std::scalbn((double)__x, __exp);\n" |
| 481 | "}\n" |
| 482 | "\n" |
| 483 | "// We need to define these overloads in exactly the namespace our standard\n" |
| 484 | "// library uses (including the right inline namespace), otherwise they won't be\n" |
| 485 | "// picked up by other functions in the standard library (e.g. functions in\n" |
| 486 | "// <complex>). Thus the ugliness below.\n" |
| 487 | "#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n" |
| 488 | "_LIBCPP_BEGIN_NAMESPACE_STD\n" |
| 489 | "#else\n" |
| 490 | "namespace std {\n" |
| 491 | "#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
| 492 | "_GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
| 493 | "#endif\n" |
| 494 | "#endif\n" |
| 495 | "\n" |
| 496 | "// Pull the new overloads we defined above into namespace std.\n" |
| 497 | "using ::acos;\n" |
| 498 | "using ::acosh;\n" |
| 499 | "using ::asin;\n" |
| 500 | "using ::asinh;\n" |
| 501 | "using ::atan;\n" |
| 502 | "using ::atan2;\n" |
| 503 | "using ::atanh;\n" |
| 504 | "using ::cbrt;\n" |
| 505 | "using ::ceil;\n" |
| 506 | "using ::copysign;\n" |
| 507 | "using ::cos;\n" |
| 508 | "using ::cosh;\n" |
| 509 | "using ::erf;\n" |
| 510 | "using ::erfc;\n" |
| 511 | "using ::exp;\n" |
| 512 | "using ::exp2;\n" |
| 513 | "using ::expm1;\n" |
| 514 | "using ::fabs;\n" |
| 515 | "using ::fdim;\n" |
| 516 | "using ::floor;\n" |
| 517 | "using ::fma;\n" |
| 518 | "using ::fmax;\n" |
| 519 | "using ::fmin;\n" |
| 520 | "using ::fmod;\n" |
| 521 | "using ::fpclassify;\n" |
| 522 | "using ::frexp;\n" |
| 523 | "using ::hypot;\n" |
| 524 | "using ::ilogb;\n" |
| 525 | "using ::isfinite;\n" |
| 526 | "using ::isgreater;\n" |
| 527 | "using ::isgreaterequal;\n" |
| 528 | "using ::isless;\n" |
| 529 | "using ::islessequal;\n" |
| 530 | "using ::islessgreater;\n" |
| 531 | "using ::isnormal;\n" |
| 532 | "using ::isunordered;\n" |
| 533 | "using ::ldexp;\n" |
| 534 | "using ::lgamma;\n" |
| 535 | "using ::llrint;\n" |
| 536 | "using ::llround;\n" |
| 537 | "using ::log;\n" |
| 538 | "using ::log10;\n" |
| 539 | "using ::log1p;\n" |
| 540 | "using ::log2;\n" |
| 541 | "using ::logb;\n" |
| 542 | "using ::lrint;\n" |
| 543 | "using ::lround;\n" |
| 544 | "using ::nearbyint;\n" |
| 545 | "using ::nextafter;\n" |
| 546 | "using ::pow;\n" |
| 547 | "using ::remainder;\n" |
| 548 | "using ::remquo;\n" |
| 549 | "using ::rint;\n" |
| 550 | "using ::round;\n" |
| 551 | "using ::scalbln;\n" |
| 552 | "using ::scalbn;\n" |
| 553 | "using ::signbit;\n" |
| 554 | "using ::sin;\n" |
| 555 | "using ::sinh;\n" |
| 556 | "using ::sqrt;\n" |
| 557 | "using ::tan;\n" |
| 558 | "using ::tanh;\n" |
| 559 | "using ::tgamma;\n" |
| 560 | "using ::trunc;\n" |
| 561 | "\n" |
| 562 | "// Well this is fun: We need to pull these symbols in for libc++, but we can't\n" |
| 563 | "// pull them in with libstdc++, because its ::isinf and ::isnan are different\n" |
| 564 | "// than its std::isinf and std::isnan.\n" |
| 565 | "#ifndef __GLIBCXX__\n" |
| 566 | "using ::isinf;\n" |
| 567 | "using ::isnan;\n" |
| 568 | "#endif\n" |
| 569 | "\n" |
| 570 | "// Finally, pull the \"foobarf\" functions that CUDA defines in its headers into\n" |
| 571 | "// namespace std.\n" |
| 572 | "using ::acosf;\n" |
| 573 | "using ::acoshf;\n" |
| 574 | "using ::asinf;\n" |
| 575 | "using ::asinhf;\n" |
| 576 | "using ::atan2f;\n" |
| 577 | "using ::atanf;\n" |
| 578 | "using ::atanhf;\n" |
| 579 | "using ::cbrtf;\n" |
| 580 | "using ::ceilf;\n" |
| 581 | "using ::copysignf;\n" |
| 582 | "using ::cosf;\n" |
| 583 | "using ::coshf;\n" |
| 584 | "using ::erfcf;\n" |
| 585 | "using ::erff;\n" |
| 586 | "using ::exp2f;\n" |
| 587 | "using ::expf;\n" |
| 588 | "using ::expm1f;\n" |
| 589 | "using ::fabsf;\n" |
| 590 | "using ::fdimf;\n" |
| 591 | "using ::floorf;\n" |
| 592 | "using ::fmaf;\n" |
| 593 | "using ::fmaxf;\n" |
| 594 | "using ::fminf;\n" |
| 595 | "using ::fmodf;\n" |
| 596 | "using ::frexpf;\n" |
| 597 | "using ::hypotf;\n" |
| 598 | "using ::ilogbf;\n" |
| 599 | "using ::ldexpf;\n" |
| 600 | "using ::lgammaf;\n" |
| 601 | "using ::llrintf;\n" |
| 602 | "using ::llroundf;\n" |
| 603 | "using ::log10f;\n" |
| 604 | "using ::log1pf;\n" |
| 605 | "using ::log2f;\n" |
| 606 | "using ::logbf;\n" |
| 607 | "using ::logf;\n" |
| 608 | "using ::lrintf;\n" |
| 609 | "using ::lroundf;\n" |
| 610 | "using ::modff;\n" |
| 611 | "using ::nearbyintf;\n" |
| 612 | "using ::nextafterf;\n" |
| 613 | "using ::powf;\n" |
| 614 | "using ::remainderf;\n" |
| 615 | "using ::remquof;\n" |
| 616 | "using ::rintf;\n" |
| 617 | "using ::roundf;\n" |
| 618 | "using ::scalblnf;\n" |
| 619 | "using ::scalbnf;\n" |
| 620 | "using ::sinf;\n" |
| 621 | "using ::sinhf;\n" |
| 622 | "using ::sqrtf;\n" |
| 623 | "using ::tanf;\n" |
| 624 | "using ::tanhf;\n" |
| 625 | "using ::tgammaf;\n" |
| 626 | "using ::truncf;\n" |
| 627 | "\n" |
| 628 | "#ifdef _LIBCPP_END_NAMESPACE_STD\n" |
| 629 | "_LIBCPP_END_NAMESPACE_STD\n" |
| 630 | "#else\n" |
| 631 | "#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
| 632 | "_GLIBCXX_END_NAMESPACE_VERSION\n" |
| 633 | "#endif\n" |
| 634 | "} // namespace std\n" |
| 635 | "#endif\n" |
| 636 | "\n" |
| 637 | "#undef __DEVICE__\n" |
| 638 | "\n" |
| 639 | "#endif\n" |
| 640 | "" } , |
| 641 | { "/builtins/__clang_cuda_complex_builtins.h" , "/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===\n" |
| 642 | " *\n" |
| 643 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 644 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 645 | " * in the Software without restriction, including without limitation the rights\n" |
| 646 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 647 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 648 | " * furnished to do so, subject to the following conditions:\n" |
| 649 | " *\n" |
| 650 | " * The above copyright notice and this permission notice shall be included in\n" |
| 651 | " * all copies or substantial portions of the Software.\n" |
| 652 | " *\n" |
| 653 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 654 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 655 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 656 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 657 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 658 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 659 | " * THE SOFTWARE.\n" |
| 660 | " *\n" |
| 661 | " *===-----------------------------------------------------------------------===\n" |
| 662 | " */\n" |
| 663 | "\n" |
| 664 | "#ifndef __CLANG_CUDA_COMPLEX_BUILTINS\n" |
| 665 | "#define __CLANG_CUDA_COMPLEX_BUILTINS\n" |
| 666 | "\n" |
| 667 | "// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are\n" |
| 668 | "// libgcc functions that clang assumes are available when compiling c99 complex\n" |
| 669 | "// operations. (These implementations come from libc++, and have been modified\n" |
| 670 | "// to work with CUDA.)\n" |
| 671 | "\n" |
| 672 | "extern \"C\" inline __device__ double _Complex __muldc3(double __a, double __b,\n" |
| 673 | " double __c, double __d) {\n" |
| 674 | " double __ac = __a * __c;\n" |
| 675 | " double __bd = __b * __d;\n" |
| 676 | " double __ad = __a * __d;\n" |
| 677 | " double __bc = __b * __c;\n" |
| 678 | " double _Complex z;\n" |
| 679 | " __real__(z) = __ac - __bd;\n" |
| 680 | " __imag__(z) = __ad + __bc;\n" |
| 681 | " if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n" |
| 682 | " int __recalc = 0;\n" |
| 683 | " if (std::isinf(__a) || std::isinf(__b)) {\n" |
| 684 | " __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n" |
| 685 | " __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n" |
| 686 | " if (std::isnan(__c))\n" |
| 687 | " __c = std::copysign(0, __c);\n" |
| 688 | " if (std::isnan(__d))\n" |
| 689 | " __d = std::copysign(0, __d);\n" |
| 690 | " __recalc = 1;\n" |
| 691 | " }\n" |
| 692 | " if (std::isinf(__c) || std::isinf(__d)) {\n" |
| 693 | " __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n" |
| 694 | " __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n" |
| 695 | " if (std::isnan(__a))\n" |
| 696 | " __a = std::copysign(0, __a);\n" |
| 697 | " if (std::isnan(__b))\n" |
| 698 | " __b = std::copysign(0, __b);\n" |
| 699 | " __recalc = 1;\n" |
| 700 | " }\n" |
| 701 | " if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n" |
| 702 | " std::isinf(__ad) || std::isinf(__bc))) {\n" |
| 703 | " if (std::isnan(__a))\n" |
| 704 | " __a = std::copysign(0, __a);\n" |
| 705 | " if (std::isnan(__b))\n" |
| 706 | " __b = std::copysign(0, __b);\n" |
| 707 | " if (std::isnan(__c))\n" |
| 708 | " __c = std::copysign(0, __c);\n" |
| 709 | " if (std::isnan(__d))\n" |
| 710 | " __d = std::copysign(0, __d);\n" |
| 711 | " __recalc = 1;\n" |
| 712 | " }\n" |
| 713 | " if (__recalc) {\n" |
| 714 | " // Can't use std::numeric_limits<double>::infinity() -- that doesn't have\n" |
| 715 | " // a device overload (and isn't constexpr before C++11, naturally).\n" |
| 716 | " __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n" |
| 717 | " __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n" |
| 718 | " }\n" |
| 719 | " }\n" |
| 720 | " return z;\n" |
| 721 | "}\n" |
| 722 | "\n" |
| 723 | "extern \"C\" inline __device__ float _Complex __mulsc3(float __a, float __b,\n" |
| 724 | " float __c, float __d) {\n" |
| 725 | " float __ac = __a * __c;\n" |
| 726 | " float __bd = __b * __d;\n" |
| 727 | " float __ad = __a * __d;\n" |
| 728 | " float __bc = __b * __c;\n" |
| 729 | " float _Complex z;\n" |
| 730 | " __real__(z) = __ac - __bd;\n" |
| 731 | " __imag__(z) = __ad + __bc;\n" |
| 732 | " if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n" |
| 733 | " int __recalc = 0;\n" |
| 734 | " if (std::isinf(__a) || std::isinf(__b)) {\n" |
| 735 | " __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n" |
| 736 | " __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n" |
| 737 | " if (std::isnan(__c))\n" |
| 738 | " __c = std::copysign(0, __c);\n" |
| 739 | " if (std::isnan(__d))\n" |
| 740 | " __d = std::copysign(0, __d);\n" |
| 741 | " __recalc = 1;\n" |
| 742 | " }\n" |
| 743 | " if (std::isinf(__c) || std::isinf(__d)) {\n" |
| 744 | " __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n" |
| 745 | " __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n" |
| 746 | " if (std::isnan(__a))\n" |
| 747 | " __a = std::copysign(0, __a);\n" |
| 748 | " if (std::isnan(__b))\n" |
| 749 | " __b = std::copysign(0, __b);\n" |
| 750 | " __recalc = 1;\n" |
| 751 | " }\n" |
| 752 | " if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) ||\n" |
| 753 | " std::isinf(__ad) || std::isinf(__bc))) {\n" |
| 754 | " if (std::isnan(__a))\n" |
| 755 | " __a = std::copysign(0, __a);\n" |
| 756 | " if (std::isnan(__b))\n" |
| 757 | " __b = std::copysign(0, __b);\n" |
| 758 | " if (std::isnan(__c))\n" |
| 759 | " __c = std::copysign(0, __c);\n" |
| 760 | " if (std::isnan(__d))\n" |
| 761 | " __d = std::copysign(0, __d);\n" |
| 762 | " __recalc = 1;\n" |
| 763 | " }\n" |
| 764 | " if (__recalc) {\n" |
| 765 | " __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);\n" |
| 766 | " __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);\n" |
| 767 | " }\n" |
| 768 | " }\n" |
| 769 | " return z;\n" |
| 770 | "}\n" |
| 771 | "\n" |
| 772 | "extern \"C\" inline __device__ double _Complex __divdc3(double __a, double __b,\n" |
| 773 | " double __c, double __d) {\n" |
| 774 | " int __ilogbw = 0;\n" |
| 775 | " // Can't use std::max, because that's defined in <algorithm>, and we don't\n" |
| 776 | " // want to pull that in for every compile. The CUDA headers define\n" |
| 777 | " // ::max(float, float) and ::max(double, double), which is sufficient for us.\n" |
| 778 | " double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n" |
| 779 | " if (std::isfinite(__logbw)) {\n" |
| 780 | " __ilogbw = (int)__logbw;\n" |
| 781 | " __c = std::scalbn(__c, -__ilogbw);\n" |
| 782 | " __d = std::scalbn(__d, -__ilogbw);\n" |
| 783 | " }\n" |
| 784 | " double __denom = __c * __c + __d * __d;\n" |
| 785 | " double _Complex z;\n" |
| 786 | " __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n" |
| 787 | " __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n" |
| 788 | " if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n" |
| 789 | " if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {\n" |
| 790 | " __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n" |
| 791 | " __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n" |
| 792 | " } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n" |
| 793 | " std::isfinite(__d)) {\n" |
| 794 | " __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);\n" |
| 795 | " __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);\n" |
| 796 | " __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n" |
| 797 | " __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n" |
| 798 | " } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&\n" |
| 799 | " std::isfinite(__b)) {\n" |
| 800 | " __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);\n" |
| 801 | " __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);\n" |
| 802 | " __real__(z) = 0.0 * (__a * __c + __b * __d);\n" |
| 803 | " __imag__(z) = 0.0 * (__b * __c - __a * __d);\n" |
| 804 | " }\n" |
| 805 | " }\n" |
| 806 | " return z;\n" |
| 807 | "}\n" |
| 808 | "\n" |
| 809 | "extern \"C\" inline __device__ float _Complex __divsc3(float __a, float __b,\n" |
| 810 | " float __c, float __d) {\n" |
| 811 | " int __ilogbw = 0;\n" |
| 812 | " float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));\n" |
| 813 | " if (std::isfinite(__logbw)) {\n" |
| 814 | " __ilogbw = (int)__logbw;\n" |
| 815 | " __c = std::scalbn(__c, -__ilogbw);\n" |
| 816 | " __d = std::scalbn(__d, -__ilogbw);\n" |
| 817 | " }\n" |
| 818 | " float __denom = __c * __c + __d * __d;\n" |
| 819 | " float _Complex z;\n" |
| 820 | " __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);\n" |
| 821 | " __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);\n" |
| 822 | " if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {\n" |
| 823 | " if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {\n" |
| 824 | " __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;\n" |
| 825 | " __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;\n" |
| 826 | " } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&\n" |
| 827 | " std::isfinite(__d)) {\n" |
| 828 | " __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);\n" |
| 829 | " __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);\n" |
| 830 | " __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);\n" |
| 831 | " __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);\n" |
| 832 | " } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&\n" |
| 833 | " std::isfinite(__b)) {\n" |
| 834 | " __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);\n" |
| 835 | " __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);\n" |
| 836 | " __real__(z) = 0 * (__a * __c + __b * __d);\n" |
| 837 | " __imag__(z) = 0 * (__b * __c - __a * __d);\n" |
| 838 | " }\n" |
| 839 | " }\n" |
| 840 | " return z;\n" |
| 841 | "}\n" |
| 842 | "\n" |
| 843 | "#endif // __CLANG_CUDA_COMPLEX_BUILTINS\n" |
| 844 | "" } , |
| 845 | { "/builtins/__clang_cuda_device_functions.h" , "/*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------===\n" |
| 846 | " *\n" |
| 847 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 848 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 849 | " * in the Software without restriction, including without limitation the rights\n" |
| 850 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 851 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 852 | " * furnished to do so, subject to the following conditions:\n" |
| 853 | " *\n" |
| 854 | " * The above copyright notice and this permission notice shall be included in\n" |
| 855 | " * all copies or substantial portions of the Software.\n" |
| 856 | " *\n" |
| 857 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 858 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 859 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 860 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 861 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 862 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 863 | " * THE SOFTWARE.\n" |
| 864 | " *\n" |
| 865 | " *===-----------------------------------------------------------------------===\n" |
| 866 | " */\n" |
| 867 | "\n" |
| 868 | "#ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n" |
| 869 | "#define __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n" |
| 870 | "\n" |
| 871 | "#if CUDA_VERSION < 9000\n" |
| 872 | "#error This file is intended to be used with CUDA-9+ only.\n" |
| 873 | "#endif\n" |
| 874 | "\n" |
| 875 | "// __DEVICE__ is a helper macro with common set of attributes for the wrappers\n" |
| 876 | "// we implement in this file. We need static in order to avoid emitting unused\n" |
| 877 | "// functions and __forceinline__ helps inlining these wrappers at -O1.\n" |
| 878 | "#pragma push_macro(\"__DEVICE__\")\n" |
| 879 | "#define __DEVICE__ static __device__ __forceinline__\n" |
| 880 | "\n" |
| 881 | "// libdevice provides fast low precision and slow full-recision implementations\n" |
| 882 | "// for some functions. Which one gets selected depends on\n" |
| 883 | "// __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if\n" |
| 884 | "// -ffast-math or -fcuda-approx-transcendentals are in effect.\n" |
| 885 | "#pragma push_macro(\"__FAST_OR_SLOW\")\n" |
| 886 | "#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n" |
| 887 | "#define __FAST_OR_SLOW(fast, slow) fast\n" |
| 888 | "#else\n" |
| 889 | "#define __FAST_OR_SLOW(fast, slow) slow\n" |
| 890 | "#endif\n" |
| 891 | "\n" |
| 892 | "__DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); }\n" |
| 893 | "__DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); }\n" |
| 894 | "__DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }\n" |
| 895 | "__DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }\n" |
| 896 | "__DEVICE__ unsigned long long __brevll(unsigned long long __a) {\n" |
| 897 | " return __nv_brevll(__a);\n" |
| 898 | "}\n" |
| 899 | "__DEVICE__ void __brkpt() { asm volatile(\"brkpt;\"); }\n" |
| 900 | "__DEVICE__ void __brkpt(int __a) { __brkpt(); }\n" |
| 901 | "__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,\n" |
| 902 | " unsigned int __c) {\n" |
| 903 | " return __nv_byte_perm(__a, __b, __c);\n" |
| 904 | "}\n" |
| 905 | "__DEVICE__ int __clz(int __a) { return __nv_clz(__a); }\n" |
| 906 | "__DEVICE__ int __clzll(long long __a) { return __nv_clzll(__a); }\n" |
| 907 | "__DEVICE__ float __cosf(float __a) { return __nv_fast_cosf(__a); }\n" |
| 908 | "__DEVICE__ double __dAtomicAdd(double *__p, double __v) {\n" |
| 909 | " return __nvvm_atom_add_gen_d(__p, __v);\n" |
| 910 | "}\n" |
| 911 | "__DEVICE__ double __dAtomicAdd_block(double *__p, double __v) {\n" |
| 912 | " return __nvvm_atom_cta_add_gen_d(__p, __v);\n" |
| 913 | "}\n" |
| 914 | "__DEVICE__ double __dAtomicAdd_system(double *__p, double __v) {\n" |
| 915 | " return __nvvm_atom_sys_add_gen_d(__p, __v);\n" |
| 916 | "}\n" |
| 917 | "__DEVICE__ double __dadd_rd(double __a, double __b) {\n" |
| 918 | " return __nv_dadd_rd(__a, __b);\n" |
| 919 | "}\n" |
| 920 | "__DEVICE__ double __dadd_rn(double __a, double __b) {\n" |
| 921 | " return __nv_dadd_rn(__a, __b);\n" |
| 922 | "}\n" |
| 923 | "__DEVICE__ double __dadd_ru(double __a, double __b) {\n" |
| 924 | " return __nv_dadd_ru(__a, __b);\n" |
| 925 | "}\n" |
| 926 | "__DEVICE__ double __dadd_rz(double __a, double __b) {\n" |
| 927 | " return __nv_dadd_rz(__a, __b);\n" |
| 928 | "}\n" |
| 929 | "__DEVICE__ double __ddiv_rd(double __a, double __b) {\n" |
| 930 | " return __nv_ddiv_rd(__a, __b);\n" |
| 931 | "}\n" |
| 932 | "__DEVICE__ double __ddiv_rn(double __a, double __b) {\n" |
| 933 | " return __nv_ddiv_rn(__a, __b);\n" |
| 934 | "}\n" |
| 935 | "__DEVICE__ double __ddiv_ru(double __a, double __b) {\n" |
| 936 | " return __nv_ddiv_ru(__a, __b);\n" |
| 937 | "}\n" |
| 938 | "__DEVICE__ double __ddiv_rz(double __a, double __b) {\n" |
| 939 | " return __nv_ddiv_rz(__a, __b);\n" |
| 940 | "}\n" |
| 941 | "__DEVICE__ double __dmul_rd(double __a, double __b) {\n" |
| 942 | " return __nv_dmul_rd(__a, __b);\n" |
| 943 | "}\n" |
| 944 | "__DEVICE__ double __dmul_rn(double __a, double __b) {\n" |
| 945 | " return __nv_dmul_rn(__a, __b);\n" |
| 946 | "}\n" |
| 947 | "__DEVICE__ double __dmul_ru(double __a, double __b) {\n" |
| 948 | " return __nv_dmul_ru(__a, __b);\n" |
| 949 | "}\n" |
| 950 | "__DEVICE__ double __dmul_rz(double __a, double __b) {\n" |
| 951 | " return __nv_dmul_rz(__a, __b);\n" |
| 952 | "}\n" |
| 953 | "__DEVICE__ float __double2float_rd(double __a) {\n" |
| 954 | " return __nv_double2float_rd(__a);\n" |
| 955 | "}\n" |
| 956 | "__DEVICE__ float __double2float_rn(double __a) {\n" |
| 957 | " return __nv_double2float_rn(__a);\n" |
| 958 | "}\n" |
| 959 | "__DEVICE__ float __double2float_ru(double __a) {\n" |
| 960 | " return __nv_double2float_ru(__a);\n" |
| 961 | "}\n" |
| 962 | "__DEVICE__ float __double2float_rz(double __a) {\n" |
| 963 | " return __nv_double2float_rz(__a);\n" |
| 964 | "}\n" |
| 965 | "__DEVICE__ int __double2hiint(double __a) { return __nv_double2hiint(__a); }\n" |
| 966 | "__DEVICE__ int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }\n" |
| 967 | "__DEVICE__ int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }\n" |
| 968 | "__DEVICE__ int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }\n" |
| 969 | "__DEVICE__ int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }\n" |
| 970 | "__DEVICE__ long long __double2ll_rd(double __a) {\n" |
| 971 | " return __nv_double2ll_rd(__a);\n" |
| 972 | "}\n" |
| 973 | "__DEVICE__ long long __double2ll_rn(double __a) {\n" |
| 974 | " return __nv_double2ll_rn(__a);\n" |
| 975 | "}\n" |
| 976 | "__DEVICE__ long long __double2ll_ru(double __a) {\n" |
| 977 | " return __nv_double2ll_ru(__a);\n" |
| 978 | "}\n" |
| 979 | "__DEVICE__ long long __double2ll_rz(double __a) {\n" |
| 980 | " return __nv_double2ll_rz(__a);\n" |
| 981 | "}\n" |
| 982 | "__DEVICE__ int __double2loint(double __a) { return __nv_double2loint(__a); }\n" |
| 983 | "__DEVICE__ unsigned int __double2uint_rd(double __a) {\n" |
| 984 | " return __nv_double2uint_rd(__a);\n" |
| 985 | "}\n" |
| 986 | "__DEVICE__ unsigned int __double2uint_rn(double __a) {\n" |
| 987 | " return __nv_double2uint_rn(__a);\n" |
| 988 | "}\n" |
| 989 | "__DEVICE__ unsigned int __double2uint_ru(double __a) {\n" |
| 990 | " return __nv_double2uint_ru(__a);\n" |
| 991 | "}\n" |
| 992 | "__DEVICE__ unsigned int __double2uint_rz(double __a) {\n" |
| 993 | " return __nv_double2uint_rz(__a);\n" |
| 994 | "}\n" |
| 995 | "__DEVICE__ unsigned long long __double2ull_rd(double __a) {\n" |
| 996 | " return __nv_double2ull_rd(__a);\n" |
| 997 | "}\n" |
| 998 | "__DEVICE__ unsigned long long __double2ull_rn(double __a) {\n" |
| 999 | " return __nv_double2ull_rn(__a);\n" |
| 1000 | "}\n" |
| 1001 | "__DEVICE__ unsigned long long __double2ull_ru(double __a) {\n" |
| 1002 | " return __nv_double2ull_ru(__a);\n" |
| 1003 | "}\n" |
| 1004 | "__DEVICE__ unsigned long long __double2ull_rz(double __a) {\n" |
| 1005 | " return __nv_double2ull_rz(__a);\n" |
| 1006 | "}\n" |
| 1007 | "__DEVICE__ long long __double_as_longlong(double __a) {\n" |
| 1008 | " return __nv_double_as_longlong(__a);\n" |
| 1009 | "}\n" |
| 1010 | "__DEVICE__ double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }\n" |
| 1011 | "__DEVICE__ double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }\n" |
| 1012 | "__DEVICE__ double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }\n" |
| 1013 | "__DEVICE__ double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }\n" |
| 1014 | "__DEVICE__ double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }\n" |
| 1015 | "__DEVICE__ double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }\n" |
| 1016 | "__DEVICE__ double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }\n" |
| 1017 | "__DEVICE__ double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }\n" |
| 1018 | "__DEVICE__ double __dsub_rd(double __a, double __b) {\n" |
| 1019 | " return __nv_dsub_rd(__a, __b);\n" |
| 1020 | "}\n" |
| 1021 | "__DEVICE__ double __dsub_rn(double __a, double __b) {\n" |
| 1022 | " return __nv_dsub_rn(__a, __b);\n" |
| 1023 | "}\n" |
| 1024 | "__DEVICE__ double __dsub_ru(double __a, double __b) {\n" |
| 1025 | " return __nv_dsub_ru(__a, __b);\n" |
| 1026 | "}\n" |
| 1027 | "__DEVICE__ double __dsub_rz(double __a, double __b) {\n" |
| 1028 | " return __nv_dsub_rz(__a, __b);\n" |
| 1029 | "}\n" |
| 1030 | "__DEVICE__ float __exp10f(float __a) { return __nv_fast_exp10f(__a); }\n" |
| 1031 | "__DEVICE__ float __expf(float __a) { return __nv_fast_expf(__a); }\n" |
| 1032 | "__DEVICE__ float __fAtomicAdd(float *__p, float __v) {\n" |
| 1033 | " return __nvvm_atom_add_gen_f(__p, __v);\n" |
| 1034 | "}\n" |
| 1035 | "__DEVICE__ float __fAtomicAdd_block(float *__p, float __v) {\n" |
| 1036 | " return __nvvm_atom_cta_add_gen_f(__p, __v);\n" |
| 1037 | "}\n" |
| 1038 | "__DEVICE__ float __fAtomicAdd_system(float *__p, float __v) {\n" |
| 1039 | " return __nvvm_atom_sys_add_gen_f(__p, __v);\n" |
| 1040 | "}\n" |
| 1041 | "__DEVICE__ float __fAtomicExch(float *__p, float __v) {\n" |
| 1042 | " return __nv_int_as_float(\n" |
| 1043 | " __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n" |
| 1044 | "}\n" |
| 1045 | "__DEVICE__ float __fAtomicExch_block(float *__p, float __v) {\n" |
| 1046 | " return __nv_int_as_float(\n" |
| 1047 | " __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n" |
| 1048 | "}\n" |
| 1049 | "__DEVICE__ float __fAtomicExch_system(float *__p, float __v) {\n" |
| 1050 | " return __nv_int_as_float(\n" |
| 1051 | " __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));\n" |
| 1052 | "}\n" |
| 1053 | "__DEVICE__ float __fadd_rd(float __a, float __b) {\n" |
| 1054 | " return __nv_fadd_rd(__a, __b);\n" |
| 1055 | "}\n" |
| 1056 | "__DEVICE__ float __fadd_rn(float __a, float __b) {\n" |
| 1057 | " return __nv_fadd_rn(__a, __b);\n" |
| 1058 | "}\n" |
| 1059 | "__DEVICE__ float __fadd_ru(float __a, float __b) {\n" |
| 1060 | " return __nv_fadd_ru(__a, __b);\n" |
| 1061 | "}\n" |
| 1062 | "__DEVICE__ float __fadd_rz(float __a, float __b) {\n" |
| 1063 | " return __nv_fadd_rz(__a, __b);\n" |
| 1064 | "}\n" |
| 1065 | "__DEVICE__ float __fdiv_rd(float __a, float __b) {\n" |
| 1066 | " return __nv_fdiv_rd(__a, __b);\n" |
| 1067 | "}\n" |
| 1068 | "__DEVICE__ float __fdiv_rn(float __a, float __b) {\n" |
| 1069 | " return __nv_fdiv_rn(__a, __b);\n" |
| 1070 | "}\n" |
| 1071 | "__DEVICE__ float __fdiv_ru(float __a, float __b) {\n" |
| 1072 | " return __nv_fdiv_ru(__a, __b);\n" |
| 1073 | "}\n" |
| 1074 | "__DEVICE__ float __fdiv_rz(float __a, float __b) {\n" |
| 1075 | " return __nv_fdiv_rz(__a, __b);\n" |
| 1076 | "}\n" |
| 1077 | "__DEVICE__ float __fdividef(float __a, float __b) {\n" |
| 1078 | " return __nv_fast_fdividef(__a, __b);\n" |
| 1079 | "}\n" |
| 1080 | "__DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); }\n" |
| 1081 | "__DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); }\n" |
| 1082 | "__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); }\n" |
| 1083 | "__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); }\n" |
| 1084 | "__DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }\n" |
| 1085 | "__DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }\n" |
| 1086 | "__DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }\n" |
| 1087 | "__DEVICE__ int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }\n" |
| 1088 | "__DEVICE__ long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }\n" |
| 1089 | "__DEVICE__ long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }\n" |
| 1090 | "__DEVICE__ long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }\n" |
| 1091 | "__DEVICE__ long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }\n" |
| 1092 | "__DEVICE__ unsigned int __float2uint_rd(float __a) {\n" |
| 1093 | " return __nv_float2uint_rd(__a);\n" |
| 1094 | "}\n" |
| 1095 | "__DEVICE__ unsigned int __float2uint_rn(float __a) {\n" |
| 1096 | " return __nv_float2uint_rn(__a);\n" |
| 1097 | "}\n" |
| 1098 | "__DEVICE__ unsigned int __float2uint_ru(float __a) {\n" |
| 1099 | " return __nv_float2uint_ru(__a);\n" |
| 1100 | "}\n" |
| 1101 | "__DEVICE__ unsigned int __float2uint_rz(float __a) {\n" |
| 1102 | " return __nv_float2uint_rz(__a);\n" |
| 1103 | "}\n" |
| 1104 | "__DEVICE__ unsigned long long __float2ull_rd(float __a) {\n" |
| 1105 | " return __nv_float2ull_rd(__a);\n" |
| 1106 | "}\n" |
| 1107 | "__DEVICE__ unsigned long long __float2ull_rn(float __a) {\n" |
| 1108 | " return __nv_float2ull_rn(__a);\n" |
| 1109 | "}\n" |
| 1110 | "__DEVICE__ unsigned long long __float2ull_ru(float __a) {\n" |
| 1111 | " return __nv_float2ull_ru(__a);\n" |
| 1112 | "}\n" |
| 1113 | "__DEVICE__ unsigned long long __float2ull_rz(float __a) {\n" |
| 1114 | " return __nv_float2ull_rz(__a);\n" |
| 1115 | "}\n" |
| 1116 | "__DEVICE__ int __float_as_int(float __a) { return __nv_float_as_int(__a); }\n" |
| 1117 | "__DEVICE__ unsigned int __float_as_uint(float __a) {\n" |
| 1118 | " return __nv_float_as_uint(__a);\n" |
| 1119 | "}\n" |
| 1120 | "__DEVICE__ double __fma_rd(double __a, double __b, double __c) {\n" |
| 1121 | " return __nv_fma_rd(__a, __b, __c);\n" |
| 1122 | "}\n" |
| 1123 | "__DEVICE__ double __fma_rn(double __a, double __b, double __c) {\n" |
| 1124 | " return __nv_fma_rn(__a, __b, __c);\n" |
| 1125 | "}\n" |
| 1126 | "__DEVICE__ double __fma_ru(double __a, double __b, double __c) {\n" |
| 1127 | " return __nv_fma_ru(__a, __b, __c);\n" |
| 1128 | "}\n" |
| 1129 | "__DEVICE__ double __fma_rz(double __a, double __b, double __c) {\n" |
| 1130 | " return __nv_fma_rz(__a, __b, __c);\n" |
| 1131 | "}\n" |
| 1132 | "__DEVICE__ float __fmaf_ieee_rd(float __a, float __b, float __c) {\n" |
| 1133 | " return __nv_fmaf_ieee_rd(__a, __b, __c);\n" |
| 1134 | "}\n" |
| 1135 | "__DEVICE__ float __fmaf_ieee_rn(float __a, float __b, float __c) {\n" |
| 1136 | " return __nv_fmaf_ieee_rn(__a, __b, __c);\n" |
| 1137 | "}\n" |
| 1138 | "__DEVICE__ float __fmaf_ieee_ru(float __a, float __b, float __c) {\n" |
| 1139 | " return __nv_fmaf_ieee_ru(__a, __b, __c);\n" |
| 1140 | "}\n" |
| 1141 | "__DEVICE__ float __fmaf_ieee_rz(float __a, float __b, float __c) {\n" |
| 1142 | " return __nv_fmaf_ieee_rz(__a, __b, __c);\n" |
| 1143 | "}\n" |
| 1144 | "__DEVICE__ float __fmaf_rd(float __a, float __b, float __c) {\n" |
| 1145 | " return __nv_fmaf_rd(__a, __b, __c);\n" |
| 1146 | "}\n" |
| 1147 | "__DEVICE__ float __fmaf_rn(float __a, float __b, float __c) {\n" |
| 1148 | " return __nv_fmaf_rn(__a, __b, __c);\n" |
| 1149 | "}\n" |
| 1150 | "__DEVICE__ float __fmaf_ru(float __a, float __b, float __c) {\n" |
| 1151 | " return __nv_fmaf_ru(__a, __b, __c);\n" |
| 1152 | "}\n" |
| 1153 | "__DEVICE__ float __fmaf_rz(float __a, float __b, float __c) {\n" |
| 1154 | " return __nv_fmaf_rz(__a, __b, __c);\n" |
| 1155 | "}\n" |
| 1156 | "__DEVICE__ float __fmul_rd(float __a, float __b) {\n" |
| 1157 | " return __nv_fmul_rd(__a, __b);\n" |
| 1158 | "}\n" |
| 1159 | "__DEVICE__ float __fmul_rn(float __a, float __b) {\n" |
| 1160 | " return __nv_fmul_rn(__a, __b);\n" |
| 1161 | "}\n" |
| 1162 | "__DEVICE__ float __fmul_ru(float __a, float __b) {\n" |
| 1163 | " return __nv_fmul_ru(__a, __b);\n" |
| 1164 | "}\n" |
| 1165 | "__DEVICE__ float __fmul_rz(float __a, float __b) {\n" |
| 1166 | " return __nv_fmul_rz(__a, __b);\n" |
| 1167 | "}\n" |
| 1168 | "__DEVICE__ float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }\n" |
| 1169 | "__DEVICE__ float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }\n" |
| 1170 | "__DEVICE__ float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }\n" |
| 1171 | "__DEVICE__ float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }\n" |
| 1172 | "__DEVICE__ float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }\n" |
| 1173 | "__DEVICE__ float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }\n" |
| 1174 | "__DEVICE__ float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }\n" |
| 1175 | "__DEVICE__ float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }\n" |
| 1176 | "__DEVICE__ float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }\n" |
| 1177 | "__DEVICE__ float __fsub_rd(float __a, float __b) {\n" |
| 1178 | " return __nv_fsub_rd(__a, __b);\n" |
| 1179 | "}\n" |
| 1180 | "__DEVICE__ float __fsub_rn(float __a, float __b) {\n" |
| 1181 | " return __nv_fsub_rn(__a, __b);\n" |
| 1182 | "}\n" |
| 1183 | "__DEVICE__ float __fsub_ru(float __a, float __b) {\n" |
| 1184 | " return __nv_fsub_ru(__a, __b);\n" |
| 1185 | "}\n" |
| 1186 | "__DEVICE__ float __fsub_rz(float __a, float __b) {\n" |
| 1187 | " return __nv_fsub_rz(__a, __b);\n" |
| 1188 | "}\n" |
| 1189 | "__DEVICE__ int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }\n" |
| 1190 | "__DEVICE__ double __hiloint2double(int __a, int __b) {\n" |
| 1191 | " return __nv_hiloint2double(__a, __b);\n" |
| 1192 | "}\n" |
| 1193 | "__DEVICE__ int __iAtomicAdd(int *__p, int __v) {\n" |
| 1194 | " return __nvvm_atom_add_gen_i(__p, __v);\n" |
| 1195 | "}\n" |
| 1196 | "__DEVICE__ int __iAtomicAdd_block(int *__p, int __v) {\n" |
| 1197 | " __nvvm_atom_cta_add_gen_i(__p, __v);\n" |
| 1198 | "}\n" |
| 1199 | "__DEVICE__ int __iAtomicAdd_system(int *__p, int __v) {\n" |
| 1200 | " __nvvm_atom_sys_add_gen_i(__p, __v);\n" |
| 1201 | "}\n" |
| 1202 | "__DEVICE__ int __iAtomicAnd(int *__p, int __v) {\n" |
| 1203 | " return __nvvm_atom_and_gen_i(__p, __v);\n" |
| 1204 | "}\n" |
| 1205 | "__DEVICE__ int __iAtomicAnd_block(int *__p, int __v) {\n" |
| 1206 | " return __nvvm_atom_cta_and_gen_i(__p, __v);\n" |
| 1207 | "}\n" |
| 1208 | "__DEVICE__ int __iAtomicAnd_system(int *__p, int __v) {\n" |
| 1209 | " return __nvvm_atom_sys_and_gen_i(__p, __v);\n" |
| 1210 | "}\n" |
| 1211 | "__DEVICE__ int __iAtomicCAS(int *__p, int __cmp, int __v) {\n" |
| 1212 | " return __nvvm_atom_cas_gen_i(__p, __cmp, __v);\n" |
| 1213 | "}\n" |
| 1214 | "__DEVICE__ int __iAtomicCAS_block(int *__p, int __cmp, int __v) {\n" |
| 1215 | " return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);\n" |
| 1216 | "}\n" |
| 1217 | "__DEVICE__ int __iAtomicCAS_system(int *__p, int __cmp, int __v) {\n" |
| 1218 | " return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);\n" |
| 1219 | "}\n" |
| 1220 | "__DEVICE__ int __iAtomicExch(int *__p, int __v) {\n" |
| 1221 | " return __nvvm_atom_xchg_gen_i(__p, __v);\n" |
| 1222 | "}\n" |
| 1223 | "__DEVICE__ int __iAtomicExch_block(int *__p, int __v) {\n" |
| 1224 | " return __nvvm_atom_cta_xchg_gen_i(__p, __v);\n" |
| 1225 | "}\n" |
| 1226 | "__DEVICE__ int __iAtomicExch_system(int *__p, int __v) {\n" |
| 1227 | " return __nvvm_atom_sys_xchg_gen_i(__p, __v);\n" |
| 1228 | "}\n" |
| 1229 | "__DEVICE__ int __iAtomicMax(int *__p, int __v) {\n" |
| 1230 | " return __nvvm_atom_max_gen_i(__p, __v);\n" |
| 1231 | "}\n" |
| 1232 | "__DEVICE__ int __iAtomicMax_block(int *__p, int __v) {\n" |
| 1233 | " return __nvvm_atom_cta_max_gen_i(__p, __v);\n" |
| 1234 | "}\n" |
| 1235 | "__DEVICE__ int __iAtomicMax_system(int *__p, int __v) {\n" |
| 1236 | " return __nvvm_atom_sys_max_gen_i(__p, __v);\n" |
| 1237 | "}\n" |
| 1238 | "__DEVICE__ int __iAtomicMin(int *__p, int __v) {\n" |
| 1239 | " return __nvvm_atom_min_gen_i(__p, __v);\n" |
| 1240 | "}\n" |
| 1241 | "__DEVICE__ int __iAtomicMin_block(int *__p, int __v) {\n" |
| 1242 | " return __nvvm_atom_cta_min_gen_i(__p, __v);\n" |
| 1243 | "}\n" |
| 1244 | "__DEVICE__ int __iAtomicMin_system(int *__p, int __v) {\n" |
| 1245 | " return __nvvm_atom_sys_min_gen_i(__p, __v);\n" |
| 1246 | "}\n" |
| 1247 | "__DEVICE__ int __iAtomicOr(int *__p, int __v) {\n" |
| 1248 | " return __nvvm_atom_or_gen_i(__p, __v);\n" |
| 1249 | "}\n" |
| 1250 | "__DEVICE__ int __iAtomicOr_block(int *__p, int __v) {\n" |
| 1251 | " return __nvvm_atom_cta_or_gen_i(__p, __v);\n" |
| 1252 | "}\n" |
| 1253 | "__DEVICE__ int __iAtomicOr_system(int *__p, int __v) {\n" |
| 1254 | " return __nvvm_atom_sys_or_gen_i(__p, __v);\n" |
| 1255 | "}\n" |
| 1256 | "__DEVICE__ int __iAtomicXor(int *__p, int __v) {\n" |
| 1257 | " return __nvvm_atom_xor_gen_i(__p, __v);\n" |
| 1258 | "}\n" |
| 1259 | "__DEVICE__ int __iAtomicXor_block(int *__p, int __v) {\n" |
| 1260 | " return __nvvm_atom_cta_xor_gen_i(__p, __v);\n" |
| 1261 | "}\n" |
| 1262 | "__DEVICE__ int __iAtomicXor_system(int *__p, int __v) {\n" |
| 1263 | " return __nvvm_atom_sys_xor_gen_i(__p, __v);\n" |
| 1264 | "}\n" |
| 1265 | "__DEVICE__ long long __illAtomicMax(long long *__p, long long __v) {\n" |
| 1266 | " return __nvvm_atom_max_gen_ll(__p, __v);\n" |
| 1267 | "}\n" |
| 1268 | "__DEVICE__ long long __illAtomicMax_block(long long *__p, long long __v) {\n" |
| 1269 | " return __nvvm_atom_cta_max_gen_ll(__p, __v);\n" |
| 1270 | "}\n" |
| 1271 | "__DEVICE__ long long __illAtomicMax_system(long long *__p, long long __v) {\n" |
| 1272 | " return __nvvm_atom_sys_max_gen_ll(__p, __v);\n" |
| 1273 | "}\n" |
| 1274 | "__DEVICE__ long long __illAtomicMin(long long *__p, long long __v) {\n" |
| 1275 | " return __nvvm_atom_min_gen_ll(__p, __v);\n" |
| 1276 | "}\n" |
| 1277 | "__DEVICE__ long long __illAtomicMin_block(long long *__p, long long __v) {\n" |
| 1278 | " return __nvvm_atom_cta_min_gen_ll(__p, __v);\n" |
| 1279 | "}\n" |
| 1280 | "__DEVICE__ long long __illAtomicMin_system(long long *__p, long long __v) {\n" |
| 1281 | " return __nvvm_atom_sys_min_gen_ll(__p, __v);\n" |
| 1282 | "}\n" |
| 1283 | "__DEVICE__ double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }\n" |
| 1284 | "__DEVICE__ float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }\n" |
| 1285 | "__DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }\n" |
| 1286 | "__DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }\n" |
| 1287 | "__DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }\n" |
| 1288 | "__DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); }\n" |
| 1289 | "__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); }\n" |
| 1290 | "__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); }\n" |
| 1291 | "__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); }\n" |
| 1292 | "__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); }\n" |
| 1293 | "__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); }\n" |
| 1294 | "__DEVICE__ double __ll2double_rd(long long __a) {\n" |
| 1295 | " return __nv_ll2double_rd(__a);\n" |
| 1296 | "}\n" |
| 1297 | "__DEVICE__ double __ll2double_rn(long long __a) {\n" |
| 1298 | " return __nv_ll2double_rn(__a);\n" |
| 1299 | "}\n" |
| 1300 | "__DEVICE__ double __ll2double_ru(long long __a) {\n" |
| 1301 | " return __nv_ll2double_ru(__a);\n" |
| 1302 | "}\n" |
| 1303 | "__DEVICE__ double __ll2double_rz(long long __a) {\n" |
| 1304 | " return __nv_ll2double_rz(__a);\n" |
| 1305 | "}\n" |
| 1306 | "__DEVICE__ float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }\n" |
| 1307 | "__DEVICE__ float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }\n" |
| 1308 | "__DEVICE__ float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }\n" |
| 1309 | "__DEVICE__ float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }\n" |
| 1310 | "__DEVICE__ long long __llAtomicAnd(long long *__p, long long __v) {\n" |
| 1311 | " return __nvvm_atom_and_gen_ll(__p, __v);\n" |
| 1312 | "}\n" |
| 1313 | "__DEVICE__ long long __llAtomicAnd_block(long long *__p, long long __v) {\n" |
| 1314 | " return __nvvm_atom_cta_and_gen_ll(__p, __v);\n" |
| 1315 | "}\n" |
| 1316 | "__DEVICE__ long long __llAtomicAnd_system(long long *__p, long long __v) {\n" |
| 1317 | " return __nvvm_atom_sys_and_gen_ll(__p, __v);\n" |
| 1318 | "}\n" |
| 1319 | "__DEVICE__ long long __llAtomicOr(long long *__p, long long __v) {\n" |
| 1320 | " return __nvvm_atom_or_gen_ll(__p, __v);\n" |
| 1321 | "}\n" |
| 1322 | "__DEVICE__ long long __llAtomicOr_block(long long *__p, long long __v) {\n" |
| 1323 | " return __nvvm_atom_cta_or_gen_ll(__p, __v);\n" |
| 1324 | "}\n" |
| 1325 | "__DEVICE__ long long __llAtomicOr_system(long long *__p, long long __v) {\n" |
| 1326 | " return __nvvm_atom_sys_or_gen_ll(__p, __v);\n" |
| 1327 | "}\n" |
| 1328 | "__DEVICE__ long long __llAtomicXor(long long *__p, long long __v) {\n" |
| 1329 | " return __nvvm_atom_xor_gen_ll(__p, __v);\n" |
| 1330 | "}\n" |
| 1331 | "__DEVICE__ long long __llAtomicXor_block(long long *__p, long long __v) {\n" |
| 1332 | " return __nvvm_atom_cta_xor_gen_ll(__p, __v);\n" |
| 1333 | "}\n" |
| 1334 | "__DEVICE__ long long __llAtomicXor_system(long long *__p, long long __v) {\n" |
| 1335 | " return __nvvm_atom_sys_xor_gen_ll(__p, __v);\n" |
| 1336 | "}\n" |
| 1337 | "__DEVICE__ float __log10f(float __a) { return __nv_fast_log10f(__a); }\n" |
| 1338 | "__DEVICE__ float __log2f(float __a) { return __nv_fast_log2f(__a); }\n" |
| 1339 | "__DEVICE__ float __logf(float __a) { return __nv_fast_logf(__a); }\n" |
| 1340 | "__DEVICE__ double __longlong_as_double(long long __a) {\n" |
| 1341 | " return __nv_longlong_as_double(__a);\n" |
| 1342 | "}\n" |
| 1343 | "__DEVICE__ int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }\n" |
| 1344 | "__DEVICE__ long long __mul64hi(long long __a, long long __b) {\n" |
| 1345 | " return __nv_mul64hi(__a, __b);\n" |
| 1346 | "}\n" |
| 1347 | "__DEVICE__ int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }\n" |
| 1348 | "__DEVICE__ unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }\n" |
| 1349 | "__DEVICE__ unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }\n" |
| 1350 | "__DEVICE__ unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }\n" |
| 1351 | "__DEVICE__ unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }\n" |
| 1352 | "__DEVICE__ int __popc(int __a) { return __nv_popc(__a); }\n" |
| 1353 | "__DEVICE__ int __popcll(long long __a) { return __nv_popcll(__a); }\n" |
| 1354 | "__DEVICE__ float __powf(float __a, float __b) {\n" |
| 1355 | " return __nv_fast_powf(__a, __b);\n" |
| 1356 | "}\n" |
| 1357 | "\n" |
| 1358 | "// Parameter must have a known integer value.\n" |
| 1359 | "#define __prof_trigger(__a) asm __volatile__(\"pmevent \\t%0;\" ::\"i\"(__a))\n" |
| 1360 | "__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }\n" |
| 1361 | "__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {\n" |
| 1362 | " return __nv_sad(__a, __b, __c);\n" |
| 1363 | "}\n" |
| 1364 | "__DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); }\n" |
| 1365 | "__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); }\n" |
| 1366 | "__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); }\n" |
| 1367 | "__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) {\n" |
| 1368 | " return __nv_fast_sincosf(__a, __sptr, __cptr);\n" |
| 1369 | "}\n" |
| 1370 | "__DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); }\n" |
| 1371 | "__DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }\n" |
| 1372 | "__DEVICE__ int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }\n" |
| 1373 | "__DEVICE__ int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }\n" |
| 1374 | "__DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }\n" |
| 1375 | "__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }\n" |
| 1376 | "__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };\n" |
| 1377 | "__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };\n" |
| 1378 | "__DEVICE__ void __trap(void) { asm volatile(\"trap;\"); }\n" |
| 1379 | "__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {\n" |
| 1380 | " return __nvvm_atom_add_gen_i((int *)__p, __v);\n" |
| 1381 | "}\n" |
| 1382 | "__DEVICE__ unsigned int __uAtomicAdd_block(unsigned int *__p,\n" |
| 1383 | " unsigned int __v) {\n" |
| 1384 | " return __nvvm_atom_cta_add_gen_i((int *)__p, __v);\n" |
| 1385 | "}\n" |
| 1386 | "__DEVICE__ unsigned int __uAtomicAdd_system(unsigned int *__p,\n" |
| 1387 | " unsigned int __v) {\n" |
| 1388 | " return __nvvm_atom_sys_add_gen_i((int *)__p, __v);\n" |
| 1389 | "}\n" |
| 1390 | "__DEVICE__ unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {\n" |
| 1391 | " return __nvvm_atom_and_gen_i((int *)__p, __v);\n" |
| 1392 | "}\n" |
| 1393 | "__DEVICE__ unsigned int __uAtomicAnd_block(unsigned int *__p,\n" |
| 1394 | " unsigned int __v) {\n" |
| 1395 | " return __nvvm_atom_cta_and_gen_i((int *)__p, __v);\n" |
| 1396 | "}\n" |
| 1397 | "__DEVICE__ unsigned int __uAtomicAnd_system(unsigned int *__p,\n" |
| 1398 | " unsigned int __v) {\n" |
| 1399 | " return __nvvm_atom_sys_and_gen_i((int *)__p, __v);\n" |
| 1400 | "}\n" |
| 1401 | "__DEVICE__ unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,\n" |
| 1402 | " unsigned int __v) {\n" |
| 1403 | " return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);\n" |
| 1404 | "}\n" |
| 1405 | "__DEVICE__ unsigned int\n" |
| 1406 | "__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n" |
| 1407 | " return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);\n" |
| 1408 | "}\n" |
| 1409 | "__DEVICE__ unsigned int\n" |
| 1410 | "__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {\n" |
| 1411 | " return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);\n" |
| 1412 | "}\n" |
| 1413 | "__DEVICE__ unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {\n" |
| 1414 | " return __nvvm_atom_dec_gen_ui(__p, __v);\n" |
| 1415 | "}\n" |
| 1416 | "__DEVICE__ unsigned int __uAtomicDec_block(unsigned int *__p,\n" |
| 1417 | " unsigned int __v) {\n" |
| 1418 | " return __nvvm_atom_cta_dec_gen_ui(__p, __v);\n" |
| 1419 | "}\n" |
| 1420 | "__DEVICE__ unsigned int __uAtomicDec_system(unsigned int *__p,\n" |
| 1421 | " unsigned int __v) {\n" |
| 1422 | " return __nvvm_atom_sys_dec_gen_ui(__p, __v);\n" |
| 1423 | "}\n" |
| 1424 | "__DEVICE__ unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {\n" |
| 1425 | " return __nvvm_atom_xchg_gen_i((int *)__p, __v);\n" |
| 1426 | "}\n" |
| 1427 | "__DEVICE__ unsigned int __uAtomicExch_block(unsigned int *__p,\n" |
| 1428 | " unsigned int __v) {\n" |
| 1429 | " return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);\n" |
| 1430 | "}\n" |
| 1431 | "__DEVICE__ unsigned int __uAtomicExch_system(unsigned int *__p,\n" |
| 1432 | " unsigned int __v) {\n" |
| 1433 | " return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);\n" |
| 1434 | "}\n" |
| 1435 | "__DEVICE__ unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {\n" |
| 1436 | " return __nvvm_atom_inc_gen_ui(__p, __v);\n" |
| 1437 | "}\n" |
| 1438 | "__DEVICE__ unsigned int __uAtomicInc_block(unsigned int *__p,\n" |
| 1439 | " unsigned int __v) {\n" |
| 1440 | " return __nvvm_atom_cta_inc_gen_ui(__p, __v);\n" |
| 1441 | "}\n" |
| 1442 | "__DEVICE__ unsigned int __uAtomicInc_system(unsigned int *__p,\n" |
| 1443 | " unsigned int __v) {\n" |
| 1444 | " return __nvvm_atom_sys_inc_gen_ui(__p, __v);\n" |
| 1445 | "}\n" |
| 1446 | "__DEVICE__ unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {\n" |
| 1447 | " return __nvvm_atom_max_gen_ui(__p, __v);\n" |
| 1448 | "}\n" |
| 1449 | "__DEVICE__ unsigned int __uAtomicMax_block(unsigned int *__p,\n" |
| 1450 | " unsigned int __v) {\n" |
| 1451 | " return __nvvm_atom_cta_max_gen_ui(__p, __v);\n" |
| 1452 | "}\n" |
| 1453 | "__DEVICE__ unsigned int __uAtomicMax_system(unsigned int *__p,\n" |
| 1454 | " unsigned int __v) {\n" |
| 1455 | " return __nvvm_atom_sys_max_gen_ui(__p, __v);\n" |
| 1456 | "}\n" |
| 1457 | "__DEVICE__ unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {\n" |
| 1458 | " return __nvvm_atom_min_gen_ui(__p, __v);\n" |
| 1459 | "}\n" |
| 1460 | "__DEVICE__ unsigned int __uAtomicMin_block(unsigned int *__p,\n" |
| 1461 | " unsigned int __v) {\n" |
| 1462 | " return __nvvm_atom_cta_min_gen_ui(__p, __v);\n" |
| 1463 | "}\n" |
| 1464 | "__DEVICE__ unsigned int __uAtomicMin_system(unsigned int *__p,\n" |
| 1465 | " unsigned int __v) {\n" |
| 1466 | " return __nvvm_atom_sys_min_gen_ui(__p, __v);\n" |
| 1467 | "}\n" |
| 1468 | "__DEVICE__ unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {\n" |
| 1469 | " return __nvvm_atom_or_gen_i((int *)__p, __v);\n" |
| 1470 | "}\n" |
| 1471 | "__DEVICE__ unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {\n" |
| 1472 | " return __nvvm_atom_cta_or_gen_i((int *)__p, __v);\n" |
| 1473 | "}\n" |
| 1474 | "__DEVICE__ unsigned int __uAtomicOr_system(unsigned int *__p,\n" |
| 1475 | " unsigned int __v) {\n" |
| 1476 | " return __nvvm_atom_sys_or_gen_i((int *)__p, __v);\n" |
| 1477 | "}\n" |
| 1478 | "__DEVICE__ unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {\n" |
| 1479 | " return __nvvm_atom_xor_gen_i((int *)__p, __v);\n" |
| 1480 | "}\n" |
| 1481 | "__DEVICE__ unsigned int __uAtomicXor_block(unsigned int *__p,\n" |
| 1482 | " unsigned int __v) {\n" |
| 1483 | " return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);\n" |
| 1484 | "}\n" |
| 1485 | "__DEVICE__ unsigned int __uAtomicXor_system(unsigned int *__p,\n" |
| 1486 | " unsigned int __v) {\n" |
| 1487 | " return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);\n" |
| 1488 | "}\n" |
| 1489 | "__DEVICE__ unsigned int __uhadd(unsigned int __a, unsigned int __b) {\n" |
| 1490 | " return __nv_uhadd(__a, __b);\n" |
| 1491 | "}\n" |
| 1492 | "__DEVICE__ double __uint2double_rn(unsigned int __a) {\n" |
| 1493 | " return __nv_uint2double_rn(__a);\n" |
| 1494 | "}\n" |
| 1495 | "__DEVICE__ float __uint2float_rd(unsigned int __a) {\n" |
| 1496 | " return __nv_uint2float_rd(__a);\n" |
| 1497 | "}\n" |
| 1498 | "__DEVICE__ float __uint2float_rn(unsigned int __a) {\n" |
| 1499 | " return __nv_uint2float_rn(__a);\n" |
| 1500 | "}\n" |
| 1501 | "__DEVICE__ float __uint2float_ru(unsigned int __a) {\n" |
| 1502 | " return __nv_uint2float_ru(__a);\n" |
| 1503 | "}\n" |
| 1504 | "__DEVICE__ float __uint2float_rz(unsigned int __a) {\n" |
| 1505 | " return __nv_uint2float_rz(__a);\n" |
| 1506 | "}\n" |
| 1507 | "__DEVICE__ float __uint_as_float(unsigned int __a) {\n" |
| 1508 | " return __nv_uint_as_float(__a);\n" |
| 1509 | "} //\n" |
| 1510 | "__DEVICE__ double __ull2double_rd(unsigned long long __a) {\n" |
| 1511 | " return __nv_ull2double_rd(__a);\n" |
| 1512 | "}\n" |
| 1513 | "__DEVICE__ double __ull2double_rn(unsigned long long __a) {\n" |
| 1514 | " return __nv_ull2double_rn(__a);\n" |
| 1515 | "}\n" |
| 1516 | "__DEVICE__ double __ull2double_ru(unsigned long long __a) {\n" |
| 1517 | " return __nv_ull2double_ru(__a);\n" |
| 1518 | "}\n" |
| 1519 | "__DEVICE__ double __ull2double_rz(unsigned long long __a) {\n" |
| 1520 | " return __nv_ull2double_rz(__a);\n" |
| 1521 | "}\n" |
| 1522 | "__DEVICE__ float __ull2float_rd(unsigned long long __a) {\n" |
| 1523 | " return __nv_ull2float_rd(__a);\n" |
| 1524 | "}\n" |
| 1525 | "__DEVICE__ float __ull2float_rn(unsigned long long __a) {\n" |
| 1526 | " return __nv_ull2float_rn(__a);\n" |
| 1527 | "}\n" |
| 1528 | "__DEVICE__ float __ull2float_ru(unsigned long long __a) {\n" |
| 1529 | " return __nv_ull2float_ru(__a);\n" |
| 1530 | "}\n" |
| 1531 | "__DEVICE__ float __ull2float_rz(unsigned long long __a) {\n" |
| 1532 | " return __nv_ull2float_rz(__a);\n" |
| 1533 | "}\n" |
| 1534 | "__DEVICE__ unsigned long long __ullAtomicAdd(unsigned long long *__p,\n" |
| 1535 | " unsigned long long __v) {\n" |
| 1536 | " return __nvvm_atom_add_gen_ll((long long *)__p, __v);\n" |
| 1537 | "}\n" |
| 1538 | "__DEVICE__ unsigned long long __ullAtomicAdd_block(unsigned long long *__p,\n" |
| 1539 | " unsigned long long __v) {\n" |
| 1540 | " return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);\n" |
| 1541 | "}\n" |
| 1542 | "__DEVICE__ unsigned long long __ullAtomicAdd_system(unsigned long long *__p,\n" |
| 1543 | " unsigned long long __v) {\n" |
| 1544 | " return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);\n" |
| 1545 | "}\n" |
| 1546 | "__DEVICE__ unsigned long long __ullAtomicAnd(unsigned long long *__p,\n" |
| 1547 | " unsigned long long __v) {\n" |
| 1548 | " return __nvvm_atom_and_gen_ll((long long *)__p, __v);\n" |
| 1549 | "}\n" |
| 1550 | "__DEVICE__ unsigned long long __ullAtomicAnd_block(unsigned long long *__p,\n" |
| 1551 | " unsigned long long __v) {\n" |
| 1552 | " return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);\n" |
| 1553 | "}\n" |
| 1554 | "__DEVICE__ unsigned long long __ullAtomicAnd_system(unsigned long long *__p,\n" |
| 1555 | " unsigned long long __v) {\n" |
| 1556 | " return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);\n" |
| 1557 | "}\n" |
| 1558 | "__DEVICE__ unsigned long long __ullAtomicCAS(unsigned long long *__p,\n" |
| 1559 | " unsigned long long __cmp,\n" |
| 1560 | " unsigned long long __v) {\n" |
| 1561 | " return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);\n" |
| 1562 | "}\n" |
| 1563 | "__DEVICE__ unsigned long long __ullAtomicCAS_block(unsigned long long *__p,\n" |
| 1564 | " unsigned long long __cmp,\n" |
| 1565 | " unsigned long long __v) {\n" |
| 1566 | " return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);\n" |
| 1567 | "}\n" |
| 1568 | "__DEVICE__ unsigned long long __ullAtomicCAS_system(unsigned long long *__p,\n" |
| 1569 | " unsigned long long __cmp,\n" |
| 1570 | " unsigned long long __v) {\n" |
| 1571 | " return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);\n" |
| 1572 | "}\n" |
| 1573 | "__DEVICE__ unsigned long long __ullAtomicExch(unsigned long long *__p,\n" |
| 1574 | " unsigned long long __v) {\n" |
| 1575 | " return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);\n" |
| 1576 | "}\n" |
| 1577 | "__DEVICE__ unsigned long long __ullAtomicExch_block(unsigned long long *__p,\n" |
| 1578 | " unsigned long long __v) {\n" |
| 1579 | " return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);\n" |
| 1580 | "}\n" |
| 1581 | "__DEVICE__ unsigned long long __ullAtomicExch_system(unsigned long long *__p,\n" |
| 1582 | " unsigned long long __v) {\n" |
| 1583 | " return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);\n" |
| 1584 | "}\n" |
| 1585 | "__DEVICE__ unsigned long long __ullAtomicMax(unsigned long long *__p,\n" |
| 1586 | " unsigned long long __v) {\n" |
| 1587 | " return __nvvm_atom_max_gen_ull(__p, __v);\n" |
| 1588 | "}\n" |
| 1589 | "__DEVICE__ unsigned long long __ullAtomicMax_block(unsigned long long *__p,\n" |
| 1590 | " unsigned long long __v) {\n" |
| 1591 | " return __nvvm_atom_cta_max_gen_ull(__p, __v);\n" |
| 1592 | "}\n" |
| 1593 | "__DEVICE__ unsigned long long __ullAtomicMax_system(unsigned long long *__p,\n" |
| 1594 | " unsigned long long __v) {\n" |
| 1595 | " return __nvvm_atom_sys_max_gen_ull(__p, __v);\n" |
| 1596 | "}\n" |
| 1597 | "__DEVICE__ unsigned long long __ullAtomicMin(unsigned long long *__p,\n" |
| 1598 | " unsigned long long __v) {\n" |
| 1599 | " return __nvvm_atom_min_gen_ull(__p, __v);\n" |
| 1600 | "}\n" |
| 1601 | "__DEVICE__ unsigned long long __ullAtomicMin_block(unsigned long long *__p,\n" |
| 1602 | " unsigned long long __v) {\n" |
| 1603 | " return __nvvm_atom_cta_min_gen_ull(__p, __v);\n" |
| 1604 | "}\n" |
| 1605 | "__DEVICE__ unsigned long long __ullAtomicMin_system(unsigned long long *__p,\n" |
| 1606 | " unsigned long long __v) {\n" |
| 1607 | " return __nvvm_atom_sys_min_gen_ull(__p, __v);\n" |
| 1608 | "}\n" |
| 1609 | "__DEVICE__ unsigned long long __ullAtomicOr(unsigned long long *__p,\n" |
| 1610 | " unsigned long long __v) {\n" |
| 1611 | " return __nvvm_atom_or_gen_ll((long long *)__p, __v);\n" |
| 1612 | "}\n" |
| 1613 | "__DEVICE__ unsigned long long __ullAtomicOr_block(unsigned long long *__p,\n" |
| 1614 | " unsigned long long __v) {\n" |
| 1615 | " return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);\n" |
| 1616 | "}\n" |
| 1617 | "__DEVICE__ unsigned long long __ullAtomicOr_system(unsigned long long *__p,\n" |
| 1618 | " unsigned long long __v) {\n" |
| 1619 | " return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);\n" |
| 1620 | "}\n" |
| 1621 | "__DEVICE__ unsigned long long __ullAtomicXor(unsigned long long *__p,\n" |
| 1622 | " unsigned long long __v) {\n" |
| 1623 | " return __nvvm_atom_xor_gen_ll((long long *)__p, __v);\n" |
| 1624 | "}\n" |
| 1625 | "__DEVICE__ unsigned long long __ullAtomicXor_block(unsigned long long *__p,\n" |
| 1626 | " unsigned long long __v) {\n" |
| 1627 | " return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);\n" |
| 1628 | "}\n" |
| 1629 | "__DEVICE__ unsigned long long __ullAtomicXor_system(unsigned long long *__p,\n" |
| 1630 | " unsigned long long __v) {\n" |
| 1631 | " return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);\n" |
| 1632 | "}\n" |
| 1633 | "__DEVICE__ unsigned int __umul24(unsigned int __a, unsigned int __b) {\n" |
| 1634 | " return __nv_umul24(__a, __b);\n" |
| 1635 | "}\n" |
| 1636 | "__DEVICE__ unsigned long long __umul64hi(unsigned long long __a,\n" |
| 1637 | " unsigned long long __b) {\n" |
| 1638 | " return __nv_umul64hi(__a, __b);\n" |
| 1639 | "}\n" |
| 1640 | "__DEVICE__ unsigned int __umulhi(unsigned int __a, unsigned int __b) {\n" |
| 1641 | " return __nv_umulhi(__a, __b);\n" |
| 1642 | "}\n" |
| 1643 | "__DEVICE__ unsigned int __urhadd(unsigned int __a, unsigned int __b) {\n" |
| 1644 | " return __nv_urhadd(__a, __b);\n" |
| 1645 | "}\n" |
| 1646 | "__DEVICE__ unsigned int __usad(unsigned int __a, unsigned int __b,\n" |
| 1647 | " unsigned int __c) {\n" |
| 1648 | " return __nv_usad(__a, __b, __c);\n" |
| 1649 | "}\n" |
| 1650 | "\n" |
| 1651 | "#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n" |
| 1652 | "__DEVICE__ unsigned int __vabs2(unsigned int __a) { return __nv_vabs2(__a); }\n" |
| 1653 | "__DEVICE__ unsigned int __vabs4(unsigned int __a) { return __nv_vabs4(__a); }\n" |
| 1654 | "__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n" |
| 1655 | " return __nv_vabsdiffs2(__a, __b);\n" |
| 1656 | "}\n" |
| 1657 | "__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n" |
| 1658 | " return __nv_vabsdiffs4(__a, __b);\n" |
| 1659 | "}\n" |
| 1660 | "__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n" |
| 1661 | " return __nv_vabsdiffu2(__a, __b);\n" |
| 1662 | "}\n" |
| 1663 | "__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n" |
| 1664 | " return __nv_vabsdiffu4(__a, __b);\n" |
| 1665 | "}\n" |
| 1666 | "__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n" |
| 1667 | " return __nv_vabsss2(__a);\n" |
| 1668 | "}\n" |
| 1669 | "__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n" |
| 1670 | " return __nv_vabsss4(__a);\n" |
| 1671 | "}\n" |
| 1672 | "__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n" |
| 1673 | " return __nv_vadd2(__a, __b);\n" |
| 1674 | "}\n" |
| 1675 | "__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n" |
| 1676 | " return __nv_vadd4(__a, __b);\n" |
| 1677 | "}\n" |
| 1678 | "__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n" |
| 1679 | " return __nv_vaddss2(__a, __b);\n" |
| 1680 | "}\n" |
| 1681 | "__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n" |
| 1682 | " return __nv_vaddss4(__a, __b);\n" |
| 1683 | "}\n" |
| 1684 | "__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n" |
| 1685 | " return __nv_vaddus2(__a, __b);\n" |
| 1686 | "}\n" |
| 1687 | "__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n" |
| 1688 | " return __nv_vaddus4(__a, __b);\n" |
| 1689 | "}\n" |
| 1690 | "__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n" |
| 1691 | " return __nv_vavgs2(__a, __b);\n" |
| 1692 | "}\n" |
| 1693 | "__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n" |
| 1694 | " return __nv_vavgs4(__a, __b);\n" |
| 1695 | "}\n" |
| 1696 | "__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n" |
| 1697 | " return __nv_vavgu2(__a, __b);\n" |
| 1698 | "}\n" |
| 1699 | "__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n" |
| 1700 | " return __nv_vavgu4(__a, __b);\n" |
| 1701 | "}\n" |
| 1702 | "__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n" |
| 1703 | " return __nv_vcmpeq2(__a, __b);\n" |
| 1704 | "}\n" |
| 1705 | "__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n" |
| 1706 | " return __nv_vcmpeq4(__a, __b);\n" |
| 1707 | "}\n" |
| 1708 | "__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n" |
| 1709 | " return __nv_vcmpges2(__a, __b);\n" |
| 1710 | "}\n" |
| 1711 | "__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n" |
| 1712 | " return __nv_vcmpges4(__a, __b);\n" |
| 1713 | "}\n" |
| 1714 | "__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n" |
| 1715 | " return __nv_vcmpgeu2(__a, __b);\n" |
| 1716 | "}\n" |
| 1717 | "__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n" |
| 1718 | " return __nv_vcmpgeu4(__a, __b);\n" |
| 1719 | "}\n" |
| 1720 | "__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n" |
| 1721 | " return __nv_vcmpgts2(__a, __b);\n" |
| 1722 | "}\n" |
| 1723 | "__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n" |
| 1724 | " return __nv_vcmpgts4(__a, __b);\n" |
| 1725 | "}\n" |
| 1726 | "__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n" |
| 1727 | " return __nv_vcmpgtu2(__a, __b);\n" |
| 1728 | "}\n" |
| 1729 | "__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n" |
| 1730 | " return __nv_vcmpgtu4(__a, __b);\n" |
| 1731 | "}\n" |
| 1732 | "__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n" |
| 1733 | " return __nv_vcmples2(__a, __b);\n" |
| 1734 | "}\n" |
| 1735 | "__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n" |
| 1736 | " return __nv_vcmples4(__a, __b);\n" |
| 1737 | "}\n" |
| 1738 | "__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n" |
| 1739 | " return __nv_vcmpleu2(__a, __b);\n" |
| 1740 | "}\n" |
| 1741 | "__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n" |
| 1742 | " return __nv_vcmpleu4(__a, __b);\n" |
| 1743 | "}\n" |
| 1744 | "__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n" |
| 1745 | " return __nv_vcmplts2(__a, __b);\n" |
| 1746 | "}\n" |
| 1747 | "__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n" |
| 1748 | " return __nv_vcmplts4(__a, __b);\n" |
| 1749 | "}\n" |
| 1750 | "__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n" |
| 1751 | " return __nv_vcmpltu2(__a, __b);\n" |
| 1752 | "}\n" |
| 1753 | "__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n" |
| 1754 | " return __nv_vcmpltu4(__a, __b);\n" |
| 1755 | "}\n" |
| 1756 | "__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n" |
| 1757 | " return __nv_vcmpne2(__a, __b);\n" |
| 1758 | "}\n" |
| 1759 | "__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n" |
| 1760 | " return __nv_vcmpne4(__a, __b);\n" |
| 1761 | "}\n" |
| 1762 | "__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n" |
| 1763 | " return __nv_vhaddu2(__a, __b);\n" |
| 1764 | "}\n" |
| 1765 | "__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n" |
| 1766 | " return __nv_vhaddu4(__a, __b);\n" |
| 1767 | "}\n" |
| 1768 | "__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n" |
| 1769 | " return __nv_vmaxs2(__a, __b);\n" |
| 1770 | "}\n" |
| 1771 | "__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n" |
| 1772 | " return __nv_vmaxs4(__a, __b);\n" |
| 1773 | "}\n" |
| 1774 | "__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n" |
| 1775 | " return __nv_vmaxu2(__a, __b);\n" |
| 1776 | "}\n" |
| 1777 | "__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n" |
| 1778 | " return __nv_vmaxu4(__a, __b);\n" |
| 1779 | "}\n" |
| 1780 | "__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n" |
| 1781 | " return __nv_vmins2(__a, __b);\n" |
| 1782 | "}\n" |
| 1783 | "__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n" |
| 1784 | " return __nv_vmins4(__a, __b);\n" |
| 1785 | "}\n" |
| 1786 | "__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n" |
| 1787 | " return __nv_vminu2(__a, __b);\n" |
| 1788 | "}\n" |
| 1789 | "__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n" |
| 1790 | " return __nv_vminu4(__a, __b);\n" |
| 1791 | "}\n" |
| 1792 | "__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __nv_vneg2(__a); }\n" |
| 1793 | "__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __nv_vneg4(__a); }\n" |
| 1794 | "__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n" |
| 1795 | " return __nv_vnegss2(__a);\n" |
| 1796 | "}\n" |
| 1797 | "__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n" |
| 1798 | " return __nv_vnegss4(__a);\n" |
| 1799 | "}\n" |
| 1800 | "__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n" |
| 1801 | " return __nv_vsads2(__a, __b);\n" |
| 1802 | "}\n" |
| 1803 | "__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n" |
| 1804 | " return __nv_vsads4(__a, __b);\n" |
| 1805 | "}\n" |
| 1806 | "__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n" |
| 1807 | " return __nv_vsadu2(__a, __b);\n" |
| 1808 | "}\n" |
| 1809 | "__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n" |
| 1810 | " return __nv_vsadu4(__a, __b);\n" |
| 1811 | "}\n" |
| 1812 | "__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n" |
| 1813 | " return __nv_vseteq2(__a, __b);\n" |
| 1814 | "}\n" |
| 1815 | "__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n" |
| 1816 | " return __nv_vseteq4(__a, __b);\n" |
| 1817 | "}\n" |
| 1818 | "__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n" |
| 1819 | " return __nv_vsetges2(__a, __b);\n" |
| 1820 | "}\n" |
| 1821 | "__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n" |
| 1822 | " return __nv_vsetges4(__a, __b);\n" |
| 1823 | "}\n" |
| 1824 | "__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n" |
| 1825 | " return __nv_vsetgeu2(__a, __b);\n" |
| 1826 | "}\n" |
| 1827 | "__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n" |
| 1828 | " return __nv_vsetgeu4(__a, __b);\n" |
| 1829 | "}\n" |
| 1830 | "__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n" |
| 1831 | " return __nv_vsetgts2(__a, __b);\n" |
| 1832 | "}\n" |
| 1833 | "__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n" |
| 1834 | " return __nv_vsetgts4(__a, __b);\n" |
| 1835 | "}\n" |
| 1836 | "__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n" |
| 1837 | " return __nv_vsetgtu2(__a, __b);\n" |
| 1838 | "}\n" |
| 1839 | "__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n" |
| 1840 | " return __nv_vsetgtu4(__a, __b);\n" |
| 1841 | "}\n" |
| 1842 | "__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n" |
| 1843 | " return __nv_vsetles2(__a, __b);\n" |
| 1844 | "}\n" |
| 1845 | "__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n" |
| 1846 | " return __nv_vsetles4(__a, __b);\n" |
| 1847 | "}\n" |
| 1848 | "__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n" |
| 1849 | " return __nv_vsetleu2(__a, __b);\n" |
| 1850 | "}\n" |
| 1851 | "__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n" |
| 1852 | " return __nv_vsetleu4(__a, __b);\n" |
| 1853 | "}\n" |
| 1854 | "__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n" |
| 1855 | " return __nv_vsetlts2(__a, __b);\n" |
| 1856 | "}\n" |
| 1857 | "__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n" |
| 1858 | " return __nv_vsetlts4(__a, __b);\n" |
| 1859 | "}\n" |
| 1860 | "__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n" |
| 1861 | " return __nv_vsetltu2(__a, __b);\n" |
| 1862 | "}\n" |
| 1863 | "__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n" |
| 1864 | " return __nv_vsetltu4(__a, __b);\n" |
| 1865 | "}\n" |
| 1866 | "__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n" |
| 1867 | " return __nv_vsetne2(__a, __b);\n" |
| 1868 | "}\n" |
| 1869 | "__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n" |
| 1870 | " return __nv_vsetne4(__a, __b);\n" |
| 1871 | "}\n" |
| 1872 | "__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n" |
| 1873 | " return __nv_vsub2(__a, __b);\n" |
| 1874 | "}\n" |
| 1875 | "__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n" |
| 1876 | " return __nv_vsub4(__a, __b);\n" |
| 1877 | "}\n" |
| 1878 | "__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n" |
| 1879 | " return __nv_vsubss2(__a, __b);\n" |
| 1880 | "}\n" |
| 1881 | "__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n" |
| 1882 | " return __nv_vsubss4(__a, __b);\n" |
| 1883 | "}\n" |
| 1884 | "__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n" |
| 1885 | " return __nv_vsubus2(__a, __b);\n" |
| 1886 | "}\n" |
| 1887 | "__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n" |
| 1888 | " return __nv_vsubus4(__a, __b);\n" |
| 1889 | "}\n" |
| 1890 | "#else // CUDA_VERSION >= 9020\n" |
| 1891 | "// CUDA no longer provides inline assembly (or bitcode) implementation of these\n" |
| 1892 | "// functions, so we have to reimplment them. The implementation is naive and is\n" |
| 1893 | "// not optimized for performance.\n" |
| 1894 | "\n" |
| 1895 | "// Helper function to convert N-bit boolean subfields into all-0 or all-1.\n" |
| 1896 | "// E.g. __bool2mask(0x01000100,8) -> 0xff00ff00\n" |
| 1897 | "// __bool2mask(0x00010000,16) -> 0xffff0000\n" |
| 1898 | "__DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {\n" |
| 1899 | " return (__a << shift) - __a;\n" |
| 1900 | "}\n" |
| 1901 | "__DEVICE__ unsigned int __vabs2(unsigned int __a) {\n" |
| 1902 | " unsigned int r;\n" |
| 1903 | " asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n" |
| 1904 | " : \"=r\"(r)\n" |
| 1905 | " : \"r\"(__a), \"r\"(0), \"r\"(0));\n" |
| 1906 | " return r;\n" |
| 1907 | "}\n" |
| 1908 | "__DEVICE__ unsigned int __vabs4(unsigned int __a) {\n" |
| 1909 | " unsigned int r;\n" |
| 1910 | " asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n" |
| 1911 | " : \"=r\"(r)\n" |
| 1912 | " : \"r\"(__a), \"r\"(0), \"r\"(0));\n" |
| 1913 | " return r;\n" |
| 1914 | "}\n" |
| 1915 | "__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {\n" |
| 1916 | " unsigned int r;\n" |
| 1917 | " asm(\"vabsdiff2.s32.s32.s32 %0,%1,%2,%3;\"\n" |
| 1918 | " : \"=r\"(r)\n" |
| 1919 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1920 | " return r;\n" |
| 1921 | "}\n" |
| 1922 | "\n" |
| 1923 | "__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {\n" |
| 1924 | " unsigned int r;\n" |
| 1925 | " asm(\"vabsdiff4.s32.s32.s32 %0,%1,%2,%3;\"\n" |
| 1926 | " : \"=r\"(r)\n" |
| 1927 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1928 | " return r;\n" |
| 1929 | "}\n" |
| 1930 | "__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {\n" |
| 1931 | " unsigned int r;\n" |
| 1932 | " asm(\"vabsdiff2.u32.u32.u32 %0,%1,%2,%3;\"\n" |
| 1933 | " : \"=r\"(r)\n" |
| 1934 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1935 | " return r;\n" |
| 1936 | "}\n" |
| 1937 | "__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {\n" |
| 1938 | " unsigned int r;\n" |
| 1939 | " asm(\"vabsdiff4.u32.u32.u32 %0,%1,%2,%3;\"\n" |
| 1940 | " : \"=r\"(r)\n" |
| 1941 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1942 | " return r;\n" |
| 1943 | "}\n" |
| 1944 | "__DEVICE__ unsigned int __vabsss2(unsigned int __a) {\n" |
| 1945 | " unsigned int r;\n" |
| 1946 | " asm(\"vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
| 1947 | " : \"=r\"(r)\n" |
| 1948 | " : \"r\"(__a), \"r\"(0), \"r\"(0));\n" |
| 1949 | " return r;\n" |
| 1950 | "}\n" |
| 1951 | "__DEVICE__ unsigned int __vabsss4(unsigned int __a) {\n" |
| 1952 | " unsigned int r;\n" |
| 1953 | " asm(\"vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
| 1954 | " : \"=r\"(r)\n" |
| 1955 | " : \"r\"(__a), \"r\"(0), \"r\"(0));\n" |
| 1956 | " return r;\n" |
| 1957 | "}\n" |
| 1958 | "__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {\n" |
| 1959 | " unsigned int r;\n" |
| 1960 | " asm(\"vadd2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1961 | " return r;\n" |
| 1962 | "}\n" |
| 1963 | "__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {\n" |
| 1964 | " unsigned int r;\n" |
| 1965 | " asm(\"vadd4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1966 | " return r;\n" |
| 1967 | "}\n" |
| 1968 | "__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {\n" |
| 1969 | " unsigned int r;\n" |
| 1970 | " asm(\"vadd2.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
| 1971 | " : \"=r\"(r)\n" |
| 1972 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1973 | " return r;\n" |
| 1974 | "}\n" |
| 1975 | "__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {\n" |
| 1976 | " unsigned int r;\n" |
| 1977 | " asm(\"vadd4.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
| 1978 | " : \"=r\"(r)\n" |
| 1979 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1980 | " return r;\n" |
| 1981 | "}\n" |
| 1982 | "__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {\n" |
| 1983 | " unsigned int r;\n" |
| 1984 | " asm(\"vadd2.u32.u32.u32.sat %0,%1,%2,%3;\"\n" |
| 1985 | " : \"=r\"(r)\n" |
| 1986 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1987 | " return r;\n" |
| 1988 | "}\n" |
| 1989 | "__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {\n" |
| 1990 | " unsigned int r;\n" |
| 1991 | " asm(\"vadd4.u32.u32.u32.sat %0,%1,%2,%3;\"\n" |
| 1992 | " : \"=r\"(r)\n" |
| 1993 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1994 | " return r;\n" |
| 1995 | "}\n" |
| 1996 | "__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {\n" |
| 1997 | " unsigned int r;\n" |
| 1998 | " asm(\"vavrg2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 1999 | " return r;\n" |
| 2000 | "}\n" |
| 2001 | "__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {\n" |
| 2002 | " unsigned int r;\n" |
| 2003 | " asm(\"vavrg4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2004 | " return r;\n" |
| 2005 | "}\n" |
| 2006 | "__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {\n" |
| 2007 | " unsigned int r;\n" |
| 2008 | " asm(\"vavrg2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2009 | " return r;\n" |
| 2010 | "}\n" |
| 2011 | "__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {\n" |
| 2012 | " unsigned int r;\n" |
| 2013 | " asm(\"vavrg4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2014 | " return r;\n" |
| 2015 | "}\n" |
| 2016 | "__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {\n" |
| 2017 | " unsigned int r;\n" |
| 2018 | " asm(\"vset2.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2019 | " return r;\n" |
| 2020 | "}\n" |
| 2021 | "__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {\n" |
| 2022 | " return __bool2mask(__vseteq2(__a, __b), 16);\n" |
| 2023 | "}\n" |
| 2024 | "__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {\n" |
| 2025 | " unsigned int r;\n" |
| 2026 | " asm(\"vset4.u32.u32.eq %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2027 | " return r;\n" |
| 2028 | "}\n" |
| 2029 | "__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {\n" |
| 2030 | " return __bool2mask(__vseteq4(__a, __b), 8);\n" |
| 2031 | "}\n" |
| 2032 | "__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {\n" |
| 2033 | " unsigned int r;\n" |
| 2034 | " asm(\"vset2.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2035 | " return r;\n" |
| 2036 | "}\n" |
| 2037 | "__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {\n" |
| 2038 | " return __bool2mask(__vsetges2(__a, __b), 16);\n" |
| 2039 | "}\n" |
| 2040 | "__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {\n" |
| 2041 | " unsigned int r;\n" |
| 2042 | " asm(\"vset4.s32.s32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2043 | " return r;\n" |
| 2044 | "}\n" |
| 2045 | "__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {\n" |
| 2046 | " return __bool2mask(__vsetges4(__a, __b), 8);\n" |
| 2047 | "}\n" |
| 2048 | "__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {\n" |
| 2049 | " unsigned int r;\n" |
| 2050 | " asm(\"vset2.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2051 | " return r;\n" |
| 2052 | "}\n" |
| 2053 | "__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {\n" |
| 2054 | " return __bool2mask(__vsetgeu2(__a, __b), 16);\n" |
| 2055 | "}\n" |
| 2056 | "__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {\n" |
| 2057 | " unsigned int r;\n" |
| 2058 | " asm(\"vset4.u32.u32.ge %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2059 | " return r;\n" |
| 2060 | "}\n" |
| 2061 | "__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {\n" |
| 2062 | " return __bool2mask(__vsetgeu4(__a, __b), 8);\n" |
| 2063 | "}\n" |
| 2064 | "__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {\n" |
| 2065 | " unsigned int r;\n" |
| 2066 | " asm(\"vset2.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2067 | " return r;\n" |
| 2068 | "}\n" |
| 2069 | "__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {\n" |
| 2070 | " return __bool2mask(__vsetgts2(__a, __b), 16);\n" |
| 2071 | "}\n" |
| 2072 | "__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {\n" |
| 2073 | " unsigned int r;\n" |
| 2074 | " asm(\"vset4.s32.s32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2075 | " return r;\n" |
| 2076 | "}\n" |
| 2077 | "__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {\n" |
| 2078 | " return __bool2mask(__vsetgts4(__a, __b), 8);\n" |
| 2079 | "}\n" |
| 2080 | "__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {\n" |
| 2081 | " unsigned int r;\n" |
| 2082 | " asm(\"vset2.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2083 | " return r;\n" |
| 2084 | "}\n" |
| 2085 | "__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {\n" |
| 2086 | " return __bool2mask(__vsetgtu2(__a, __b), 16);\n" |
| 2087 | "}\n" |
| 2088 | "__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {\n" |
| 2089 | " unsigned int r;\n" |
| 2090 | " asm(\"vset4.u32.u32.gt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2091 | " return r;\n" |
| 2092 | "}\n" |
| 2093 | "__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {\n" |
| 2094 | " return __bool2mask(__vsetgtu4(__a, __b), 8);\n" |
| 2095 | "}\n" |
| 2096 | "__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {\n" |
| 2097 | " unsigned int r;\n" |
| 2098 | " asm(\"vset2.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2099 | " return r;\n" |
| 2100 | "}\n" |
| 2101 | "__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {\n" |
| 2102 | " return __bool2mask(__vsetles2(__a, __b), 16);\n" |
| 2103 | "}\n" |
| 2104 | "__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {\n" |
| 2105 | " unsigned int r;\n" |
| 2106 | " asm(\"vset4.s32.s32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2107 | " return r;\n" |
| 2108 | "}\n" |
| 2109 | "__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {\n" |
| 2110 | " return __bool2mask(__vsetles4(__a, __b), 8);\n" |
| 2111 | "}\n" |
| 2112 | "__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {\n" |
| 2113 | " unsigned int r;\n" |
| 2114 | " asm(\"vset2.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2115 | " return r;\n" |
| 2116 | "}\n" |
| 2117 | "__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {\n" |
| 2118 | " return __bool2mask(__vsetleu2(__a, __b), 16);\n" |
| 2119 | "}\n" |
| 2120 | "__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {\n" |
| 2121 | " unsigned int r;\n" |
| 2122 | " asm(\"vset4.u32.u32.le %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2123 | " return r;\n" |
| 2124 | "}\n" |
| 2125 | "__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {\n" |
| 2126 | " return __bool2mask(__vsetleu4(__a, __b), 8);\n" |
| 2127 | "}\n" |
| 2128 | "__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {\n" |
| 2129 | " unsigned int r;\n" |
| 2130 | " asm(\"vset2.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2131 | " return r;\n" |
| 2132 | "}\n" |
| 2133 | "__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {\n" |
| 2134 | " return __bool2mask(__vsetlts2(__a, __b), 16);\n" |
| 2135 | "}\n" |
| 2136 | "__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {\n" |
| 2137 | " unsigned int r;\n" |
| 2138 | " asm(\"vset4.s32.s32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2139 | " return r;\n" |
| 2140 | "}\n" |
| 2141 | "__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {\n" |
| 2142 | " return __bool2mask(__vsetlts4(__a, __b), 8);\n" |
| 2143 | "}\n" |
| 2144 | "__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {\n" |
| 2145 | " unsigned int r;\n" |
| 2146 | " asm(\"vset2.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2147 | " return r;\n" |
| 2148 | "}\n" |
| 2149 | "__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {\n" |
| 2150 | " return __bool2mask(__vsetltu2(__a, __b), 16);\n" |
| 2151 | "}\n" |
| 2152 | "__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {\n" |
| 2153 | " unsigned int r;\n" |
| 2154 | " asm(\"vset4.u32.u32.lt %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2155 | " return r;\n" |
| 2156 | "}\n" |
| 2157 | "__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {\n" |
| 2158 | " return __bool2mask(__vsetltu4(__a, __b), 8);\n" |
| 2159 | "}\n" |
| 2160 | "__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {\n" |
| 2161 | " unsigned int r;\n" |
| 2162 | " asm(\"vset2.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2163 | " return r;\n" |
| 2164 | "}\n" |
| 2165 | "__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {\n" |
| 2166 | " return __bool2mask(__vsetne2(__a, __b), 16);\n" |
| 2167 | "}\n" |
| 2168 | "__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {\n" |
| 2169 | " unsigned int r;\n" |
| 2170 | " asm(\"vset4.u32.u32.ne %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2171 | " return r;\n" |
| 2172 | "}\n" |
| 2173 | "__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {\n" |
| 2174 | " return __bool2mask(__vsetne4(__a, __b), 8);\n" |
| 2175 | "}\n" |
| 2176 | "\n" |
| 2177 | "// Based on ITEM 23 in AIM-239: http://dspace.mit.edu/handle/1721.1/6086\n" |
| 2178 | "// (a & b) + (a | b) = a + b = (a ^ b) + 2 * (a & b) =>\n" |
| 2179 | "// (a + b) / 2 = ((a ^ b) >> 1) + (a & b)\n" |
| 2180 | "// To operate on multiple sub-elements we need to make sure to mask out bits\n" |
| 2181 | "// that crossed over into adjacent elements during the shift.\n" |
| 2182 | "__DEVICE__ unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {\n" |
| 2183 | " return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);\n" |
| 2184 | "}\n" |
| 2185 | "__DEVICE__ unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {\n" |
| 2186 | " return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);\n" |
| 2187 | "}\n" |
| 2188 | "\n" |
| 2189 | "__DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {\n" |
| 2190 | " unsigned int r;\n" |
| 2191 | " if ((__a & 0x8000) && (__b & 0x8000)) {\n" |
| 2192 | " // Work around a bug in ptxas which produces invalid result if low element\n" |
| 2193 | " // is negative.\n" |
| 2194 | " unsigned mask = __vcmpgts2(__a, __b);\n" |
| 2195 | " r = (__a & mask) | (__b & ~mask);\n" |
| 2196 | " } else {\n" |
| 2197 | " asm(\"vmax2.s32.s32.s32 %0,%1,%2,%3;\"\n" |
| 2198 | " : \"=r\"(r)\n" |
| 2199 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2200 | " }\n" |
| 2201 | " return r;\n" |
| 2202 | "}\n" |
| 2203 | "__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {\n" |
| 2204 | " unsigned int r;\n" |
| 2205 | " asm(\"vmax4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2206 | " return r;\n" |
| 2207 | "}\n" |
| 2208 | "__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {\n" |
| 2209 | " unsigned int r;\n" |
| 2210 | " asm(\"vmax2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2211 | " return r;\n" |
| 2212 | "}\n" |
| 2213 | "__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {\n" |
| 2214 | " unsigned int r;\n" |
| 2215 | " asm(\"vmax4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2216 | " return r;\n" |
| 2217 | "}\n" |
| 2218 | "__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {\n" |
| 2219 | " unsigned int r;\n" |
| 2220 | " asm(\"vmin2.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2221 | " return r;\n" |
| 2222 | "}\n" |
| 2223 | "__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {\n" |
| 2224 | " unsigned int r;\n" |
| 2225 | " asm(\"vmin4.s32.s32.s32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2226 | " return r;\n" |
| 2227 | "}\n" |
| 2228 | "__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {\n" |
| 2229 | " unsigned int r;\n" |
| 2230 | " asm(\"vmin2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2231 | " return r;\n" |
| 2232 | "}\n" |
| 2233 | "__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {\n" |
| 2234 | " unsigned int r;\n" |
| 2235 | " asm(\"vmin4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2236 | " return r;\n" |
| 2237 | "}\n" |
| 2238 | "__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {\n" |
| 2239 | " unsigned int r;\n" |
| 2240 | " asm(\"vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;\"\n" |
| 2241 | " : \"=r\"(r)\n" |
| 2242 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2243 | " return r;\n" |
| 2244 | "}\n" |
| 2245 | "__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {\n" |
| 2246 | " unsigned int r;\n" |
| 2247 | " asm(\"vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;\"\n" |
| 2248 | " : \"=r\"(r)\n" |
| 2249 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2250 | " return r;\n" |
| 2251 | "}\n" |
| 2252 | "__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {\n" |
| 2253 | " unsigned int r;\n" |
| 2254 | " asm(\"vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;\"\n" |
| 2255 | " : \"=r\"(r)\n" |
| 2256 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2257 | " return r;\n" |
| 2258 | "}\n" |
| 2259 | "__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {\n" |
| 2260 | " unsigned int r;\n" |
| 2261 | " asm(\"vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;\"\n" |
| 2262 | " : \"=r\"(r)\n" |
| 2263 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2264 | " return r;\n" |
| 2265 | "}\n" |
| 2266 | "\n" |
| 2267 | "__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {\n" |
| 2268 | " unsigned int r;\n" |
| 2269 | " asm(\"vsub2.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2270 | " return r;\n" |
| 2271 | "}\n" |
| 2272 | "__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }\n" |
| 2273 | "\n" |
| 2274 | "__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {\n" |
| 2275 | " unsigned int r;\n" |
| 2276 | " asm(\"vsub4.u32.u32.u32 %0,%1,%2,%3;\" : \"=r\"(r) : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2277 | " return r;\n" |
| 2278 | "}\n" |
| 2279 | "__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }\n" |
| 2280 | "__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {\n" |
| 2281 | " unsigned int r;\n" |
| 2282 | " asm(\"vsub2.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
| 2283 | " : \"=r\"(r)\n" |
| 2284 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2285 | " return r;\n" |
| 2286 | "}\n" |
| 2287 | "__DEVICE__ unsigned int __vnegss2(unsigned int __a) {\n" |
| 2288 | " return __vsubss2(0, __a);\n" |
| 2289 | "}\n" |
| 2290 | "__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {\n" |
| 2291 | " unsigned int r;\n" |
| 2292 | " asm(\"vsub4.s32.s32.s32.sat %0,%1,%2,%3;\"\n" |
| 2293 | " : \"=r\"(r)\n" |
| 2294 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2295 | " return r;\n" |
| 2296 | "}\n" |
| 2297 | "__DEVICE__ unsigned int __vnegss4(unsigned int __a) {\n" |
| 2298 | " return __vsubss4(0, __a);\n" |
| 2299 | "}\n" |
| 2300 | "__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {\n" |
| 2301 | " unsigned int r;\n" |
| 2302 | " asm(\"vsub2.u32.u32.u32.sat %0,%1,%2,%3;\"\n" |
| 2303 | " : \"=r\"(r)\n" |
| 2304 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2305 | " return r;\n" |
| 2306 | "}\n" |
| 2307 | "__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {\n" |
| 2308 | " unsigned int r;\n" |
| 2309 | " asm(\"vsub4.u32.u32.u32.sat %0,%1,%2,%3;\"\n" |
| 2310 | " : \"=r\"(r)\n" |
| 2311 | " : \"r\"(__a), \"r\"(__b), \"r\"(0));\n" |
| 2312 | " return r;\n" |
| 2313 | "}\n" |
| 2314 | "#endif // CUDA_VERSION >= 9020\n" |
| 2315 | "__DEVICE__ int abs(int __a) { return __nv_abs(__a); }\n" |
| 2316 | "__DEVICE__ double acos(double __a) { return __nv_acos(__a); }\n" |
| 2317 | "__DEVICE__ float acosf(float __a) { return __nv_acosf(__a); }\n" |
| 2318 | "__DEVICE__ double acosh(double __a) { return __nv_acosh(__a); }\n" |
| 2319 | "__DEVICE__ float acoshf(float __a) { return __nv_acoshf(__a); }\n" |
| 2320 | "__DEVICE__ double asin(double __a) { return __nv_asin(__a); }\n" |
| 2321 | "__DEVICE__ float asinf(float __a) { return __nv_asinf(__a); }\n" |
| 2322 | "__DEVICE__ double asinh(double __a) { return __nv_asinh(__a); }\n" |
| 2323 | "__DEVICE__ float asinhf(float __a) { return __nv_asinhf(__a); }\n" |
| 2324 | "__DEVICE__ double atan(double __a) { return __nv_atan(__a); }\n" |
| 2325 | "__DEVICE__ double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }\n" |
| 2326 | "__DEVICE__ float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }\n" |
| 2327 | "__DEVICE__ float atanf(float __a) { return __nv_atanf(__a); }\n" |
| 2328 | "__DEVICE__ double atanh(double __a) { return __nv_atanh(__a); }\n" |
| 2329 | "__DEVICE__ float atanhf(float __a) { return __nv_atanhf(__a); }\n" |
| 2330 | "__DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); }\n" |
| 2331 | "__DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); }\n" |
| 2332 | "__DEVICE__ double ceil(double __a) { return __nv_ceil(__a); }\n" |
| 2333 | "__DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); }\n" |
| 2334 | "__DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); }\n" |
| 2335 | "__DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }\n" |
| 2336 | "__DEVICE__ double copysign(double __a, double __b) {\n" |
| 2337 | " return __nv_copysign(__a, __b);\n" |
| 2338 | "}\n" |
| 2339 | "__DEVICE__ float copysignf(float __a, float __b) {\n" |
| 2340 | " return __nv_copysignf(__a, __b);\n" |
| 2341 | "}\n" |
| 2342 | "__DEVICE__ double cos(double __a) { return __nv_cos(__a); }\n" |
| 2343 | "__DEVICE__ float cosf(float __a) {\n" |
| 2344 | " return __FAST_OR_SLOW(__nv_fast_cosf, __nv_cosf)(__a);\n" |
| 2345 | "}\n" |
| 2346 | "__DEVICE__ double cosh(double __a) { return __nv_cosh(__a); }\n" |
| 2347 | "__DEVICE__ float coshf(float __a) { return __nv_coshf(__a); }\n" |
| 2348 | "__DEVICE__ double cospi(double __a) { return __nv_cospi(__a); }\n" |
| 2349 | "__DEVICE__ float cospif(float __a) { return __nv_cospif(__a); }\n" |
| 2350 | "__DEVICE__ double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); }\n" |
| 2351 | "__DEVICE__ float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); }\n" |
| 2352 | "__DEVICE__ double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); }\n" |
| 2353 | "__DEVICE__ float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); }\n" |
| 2354 | "__DEVICE__ double erf(double __a) { return __nv_erf(__a); }\n" |
| 2355 | "__DEVICE__ double erfc(double __a) { return __nv_erfc(__a); }\n" |
| 2356 | "__DEVICE__ float erfcf(float __a) { return __nv_erfcf(__a); }\n" |
| 2357 | "__DEVICE__ double erfcinv(double __a) { return __nv_erfcinv(__a); }\n" |
| 2358 | "__DEVICE__ float erfcinvf(float __a) { return __nv_erfcinvf(__a); }\n" |
| 2359 | "__DEVICE__ double erfcx(double __a) { return __nv_erfcx(__a); }\n" |
| 2360 | "__DEVICE__ float erfcxf(float __a) { return __nv_erfcxf(__a); }\n" |
| 2361 | "__DEVICE__ float erff(float __a) { return __nv_erff(__a); }\n" |
| 2362 | "__DEVICE__ double erfinv(double __a) { return __nv_erfinv(__a); }\n" |
| 2363 | "__DEVICE__ float erfinvf(float __a) { return __nv_erfinvf(__a); }\n" |
| 2364 | "__DEVICE__ double exp(double __a) { return __nv_exp(__a); }\n" |
| 2365 | "__DEVICE__ double exp10(double __a) { return __nv_exp10(__a); }\n" |
| 2366 | "__DEVICE__ float exp10f(float __a) { return __nv_exp10f(__a); }\n" |
| 2367 | "__DEVICE__ double exp2(double __a) { return __nv_exp2(__a); }\n" |
| 2368 | "__DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); }\n" |
| 2369 | "__DEVICE__ float expf(float __a) { return __nv_expf(__a); }\n" |
| 2370 | "__DEVICE__ double expm1(double __a) { return __nv_expm1(__a); }\n" |
| 2371 | "__DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); }\n" |
| 2372 | "__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); }\n" |
| 2373 | "__DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); }\n" |
| 2374 | "__DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }\n" |
| 2375 | "__DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }\n" |
| 2376 | "__DEVICE__ double fdivide(double __a, double __b) { return __a / __b; }\n" |
| 2377 | "__DEVICE__ float fdividef(float __a, float __b) {\n" |
| 2378 | "#if __FAST_MATH__ && !__CUDA_PREC_DIV\n" |
| 2379 | " return __nv_fast_fdividef(__a, __b);\n" |
| 2380 | "#else\n" |
| 2381 | " return __a / __b;\n" |
| 2382 | "#endif\n" |
| 2383 | "}\n" |
| 2384 | "__DEVICE__ double floor(double __f) { return __nv_floor(__f); }\n" |
| 2385 | "__DEVICE__ float floorf(float __f) { return __nv_floorf(__f); }\n" |
| 2386 | "__DEVICE__ double fma(double __a, double __b, double __c) {\n" |
| 2387 | " return __nv_fma(__a, __b, __c);\n" |
| 2388 | "}\n" |
| 2389 | "__DEVICE__ float fmaf(float __a, float __b, float __c) {\n" |
| 2390 | " return __nv_fmaf(__a, __b, __c);\n" |
| 2391 | "}\n" |
| 2392 | "__DEVICE__ double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }\n" |
| 2393 | "__DEVICE__ float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }\n" |
| 2394 | "__DEVICE__ double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }\n" |
| 2395 | "__DEVICE__ float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }\n" |
| 2396 | "__DEVICE__ double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }\n" |
| 2397 | "__DEVICE__ float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }\n" |
| 2398 | "__DEVICE__ double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }\n" |
| 2399 | "__DEVICE__ float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }\n" |
| 2400 | "__DEVICE__ double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }\n" |
| 2401 | "__DEVICE__ float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }\n" |
| 2402 | "__DEVICE__ int ilogb(double __a) { return __nv_ilogb(__a); }\n" |
| 2403 | "__DEVICE__ int ilogbf(float __a) { return __nv_ilogbf(__a); }\n" |
| 2404 | "__DEVICE__ double j0(double __a) { return __nv_j0(__a); }\n" |
| 2405 | "__DEVICE__ float j0f(float __a) { return __nv_j0f(__a); }\n" |
| 2406 | "__DEVICE__ double j1(double __a) { return __nv_j1(__a); }\n" |
| 2407 | "__DEVICE__ float j1f(float __a) { return __nv_j1f(__a); }\n" |
| 2408 | "__DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); }\n" |
| 2409 | "__DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }\n" |
| 2410 | "#if defined(__LP64__)\n" |
| 2411 | "__DEVICE__ long labs(long __a) { return llabs(__a); };\n" |
| 2412 | "#else\n" |
| 2413 | "__DEVICE__ long labs(long __a) { return __nv_abs(__a); };\n" |
| 2414 | "#endif\n" |
| 2415 | "__DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }\n" |
| 2416 | "__DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }\n" |
| 2417 | "__DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); }\n" |
| 2418 | "__DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); }\n" |
| 2419 | "__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); }\n" |
| 2420 | "__DEVICE__ long long llmax(long long __a, long long __b) {\n" |
| 2421 | " return __nv_llmax(__a, __b);\n" |
| 2422 | "}\n" |
| 2423 | "__DEVICE__ long long llmin(long long __a, long long __b) {\n" |
| 2424 | " return __nv_llmin(__a, __b);\n" |
| 2425 | "}\n" |
| 2426 | "__DEVICE__ long long llrint(double __a) { return __nv_llrint(__a); }\n" |
| 2427 | "__DEVICE__ long long llrintf(float __a) { return __nv_llrintf(__a); }\n" |
| 2428 | "__DEVICE__ long long llround(double __a) { return __nv_llround(__a); }\n" |
| 2429 | "__DEVICE__ long long llroundf(float __a) { return __nv_llroundf(__a); }\n" |
| 2430 | "__DEVICE__ double log(double __a) { return __nv_log(__a); }\n" |
| 2431 | "__DEVICE__ double log10(double __a) { return __nv_log10(__a); }\n" |
| 2432 | "__DEVICE__ float log10f(float __a) { return __nv_log10f(__a); }\n" |
| 2433 | "__DEVICE__ double log1p(double __a) { return __nv_log1p(__a); }\n" |
| 2434 | "__DEVICE__ float log1pf(float __a) { return __nv_log1pf(__a); }\n" |
| 2435 | "__DEVICE__ double log2(double __a) { return __nv_log2(__a); }\n" |
| 2436 | "__DEVICE__ float log2f(float __a) {\n" |
| 2437 | " return __FAST_OR_SLOW(__nv_fast_log2f, __nv_log2f)(__a);\n" |
| 2438 | "}\n" |
| 2439 | "__DEVICE__ double logb(double __a) { return __nv_logb(__a); }\n" |
| 2440 | "__DEVICE__ float logbf(float __a) { return __nv_logbf(__a); }\n" |
| 2441 | "__DEVICE__ float logf(float __a) {\n" |
| 2442 | " return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a);\n" |
| 2443 | "}\n" |
| 2444 | "#if defined(__LP64__)\n" |
| 2445 | "__DEVICE__ long lrint(double __a) { return llrint(__a); }\n" |
| 2446 | "__DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); }\n" |
| 2447 | "__DEVICE__ long lround(double __a) { return llround(__a); }\n" |
| 2448 | "__DEVICE__ long lroundf(float __a) { return llroundf(__a); }\n" |
| 2449 | "#else\n" |
| 2450 | "__DEVICE__ long lrint(double __a) { return (long)rint(__a); }\n" |
| 2451 | "__DEVICE__ long lrintf(float __a) { return __float2int_rn(__a); }\n" |
| 2452 | "__DEVICE__ long lround(double __a) { return round(__a); }\n" |
| 2453 | "__DEVICE__ long lroundf(float __a) { return roundf(__a); }\n" |
| 2454 | "#endif\n" |
| 2455 | "__DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); }\n" |
| 2456 | "__DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) {\n" |
| 2457 | " return __builtin_memcpy(__a, __b, __c);\n" |
| 2458 | "}\n" |
| 2459 | "__DEVICE__ void *memset(void *__a, int __b, size_t __c) {\n" |
| 2460 | " return __builtin_memset(__a, __b, __c);\n" |
| 2461 | "}\n" |
| 2462 | "__DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); }\n" |
| 2463 | "__DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); }\n" |
| 2464 | "__DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); }\n" |
| 2465 | "__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); }\n" |
| 2466 | "__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); }\n" |
| 2467 | "__DEVICE__ double nextafter(double __a, double __b) {\n" |
| 2468 | " return __nv_nextafter(__a, __b);\n" |
| 2469 | "}\n" |
| 2470 | "__DEVICE__ float nextafterf(float __a, float __b) {\n" |
| 2471 | " return __nv_nextafterf(__a, __b);\n" |
| 2472 | "}\n" |
| 2473 | "__DEVICE__ double norm(int __dim, const double *__t) {\n" |
| 2474 | " return __nv_norm(__dim, __t);\n" |
| 2475 | "}\n" |
| 2476 | "__DEVICE__ double norm3d(double __a, double __b, double __c) {\n" |
| 2477 | " return __nv_norm3d(__a, __b, __c);\n" |
| 2478 | "}\n" |
| 2479 | "__DEVICE__ float norm3df(float __a, float __b, float __c) {\n" |
| 2480 | " return __nv_norm3df(__a, __b, __c);\n" |
| 2481 | "}\n" |
| 2482 | "__DEVICE__ double norm4d(double __a, double __b, double __c, double __d) {\n" |
| 2483 | " return __nv_norm4d(__a, __b, __c, __d);\n" |
| 2484 | "}\n" |
| 2485 | "__DEVICE__ float norm4df(float __a, float __b, float __c, float __d) {\n" |
| 2486 | " return __nv_norm4df(__a, __b, __c, __d);\n" |
| 2487 | "}\n" |
| 2488 | "__DEVICE__ double normcdf(double __a) { return __nv_normcdf(__a); }\n" |
| 2489 | "__DEVICE__ float normcdff(float __a) { return __nv_normcdff(__a); }\n" |
| 2490 | "__DEVICE__ double normcdfinv(double __a) { return __nv_normcdfinv(__a); }\n" |
| 2491 | "__DEVICE__ float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); }\n" |
| 2492 | "__DEVICE__ float normf(int __dim, const float *__t) {\n" |
| 2493 | " return __nv_normf(__dim, __t);\n" |
| 2494 | "}\n" |
| 2495 | "__DEVICE__ double pow(double __a, double __b) { return __nv_pow(__a, __b); }\n" |
| 2496 | "__DEVICE__ float powf(float __a, float __b) { return __nv_powf(__a, __b); }\n" |
| 2497 | "__DEVICE__ double powi(double __a, int __b) { return __nv_powi(__a, __b); }\n" |
| 2498 | "__DEVICE__ float powif(float __a, int __b) { return __nv_powif(__a, __b); }\n" |
| 2499 | "__DEVICE__ double rcbrt(double __a) { return __nv_rcbrt(__a); }\n" |
| 2500 | "__DEVICE__ float rcbrtf(float __a) { return __nv_rcbrtf(__a); }\n" |
| 2501 | "__DEVICE__ double remainder(double __a, double __b) {\n" |
| 2502 | " return __nv_remainder(__a, __b);\n" |
| 2503 | "}\n" |
| 2504 | "__DEVICE__ float remainderf(float __a, float __b) {\n" |
| 2505 | " return __nv_remainderf(__a, __b);\n" |
| 2506 | "}\n" |
| 2507 | "__DEVICE__ double remquo(double __a, double __b, int *__c) {\n" |
| 2508 | " return __nv_remquo(__a, __b, __c);\n" |
| 2509 | "}\n" |
| 2510 | "__DEVICE__ float remquof(float __a, float __b, int *__c) {\n" |
| 2511 | " return __nv_remquof(__a, __b, __c);\n" |
| 2512 | "}\n" |
| 2513 | "__DEVICE__ double rhypot(double __a, double __b) {\n" |
| 2514 | " return __nv_rhypot(__a, __b);\n" |
| 2515 | "}\n" |
| 2516 | "__DEVICE__ float rhypotf(float __a, float __b) {\n" |
| 2517 | " return __nv_rhypotf(__a, __b);\n" |
| 2518 | "}\n" |
| 2519 | "__DEVICE__ double rint(double __a) { return __nv_rint(__a); }\n" |
| 2520 | "__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); }\n" |
| 2521 | "__DEVICE__ double rnorm(int __a, const double *__b) {\n" |
| 2522 | " return __nv_rnorm(__a, __b);\n" |
| 2523 | "}\n" |
| 2524 | "__DEVICE__ double rnorm3d(double __a, double __b, double __c) {\n" |
| 2525 | " return __nv_rnorm3d(__a, __b, __c);\n" |
| 2526 | "}\n" |
| 2527 | "__DEVICE__ float rnorm3df(float __a, float __b, float __c) {\n" |
| 2528 | " return __nv_rnorm3df(__a, __b, __c);\n" |
| 2529 | "}\n" |
| 2530 | "__DEVICE__ double rnorm4d(double __a, double __b, double __c, double __d) {\n" |
| 2531 | " return __nv_rnorm4d(__a, __b, __c, __d);\n" |
| 2532 | "}\n" |
| 2533 | "__DEVICE__ float rnorm4df(float __a, float __b, float __c, float __d) {\n" |
| 2534 | " return __nv_rnorm4df(__a, __b, __c, __d);\n" |
| 2535 | "}\n" |
| 2536 | "__DEVICE__ float rnormf(int __dim, const float *__t) {\n" |
| 2537 | " return __nv_rnormf(__dim, __t);\n" |
| 2538 | "}\n" |
| 2539 | "__DEVICE__ double round(double __a) { return __nv_round(__a); }\n" |
| 2540 | "__DEVICE__ float roundf(float __a) { return __nv_roundf(__a); }\n" |
| 2541 | "__DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); }\n" |
| 2542 | "__DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); }\n" |
| 2543 | "__DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }\n" |
| 2544 | "__DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }\n" |
| 2545 | "__DEVICE__ double scalbln(double __a, long __b) {\n" |
| 2546 | " if (__b > INT_MAX)\n" |
| 2547 | " return __a > 0 ? HUGE_VAL : -HUGE_VAL;\n" |
| 2548 | " if (__b < INT_MIN)\n" |
| 2549 | " return __a > 0 ? 0.0 : -0.0;\n" |
| 2550 | " return scalbn(__a, (int)__b);\n" |
| 2551 | "}\n" |
| 2552 | "__DEVICE__ float scalblnf(float __a, long __b) {\n" |
| 2553 | " if (__b > INT_MAX)\n" |
| 2554 | " return __a > 0 ? HUGE_VALF : -HUGE_VALF;\n" |
| 2555 | " if (__b < INT_MIN)\n" |
| 2556 | " return __a > 0 ? 0.f : -0.f;\n" |
| 2557 | " return scalbnf(__a, (int)__b);\n" |
| 2558 | "}\n" |
| 2559 | "__DEVICE__ double sin(double __a) { return __nv_sin(__a); }\n" |
| 2560 | "__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) {\n" |
| 2561 | " return __nv_sincos(__a, __sptr, __cptr);\n" |
| 2562 | "}\n" |
| 2563 | "__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) {\n" |
| 2564 | " return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr);\n" |
| 2565 | "}\n" |
| 2566 | "__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) {\n" |
| 2567 | " return __nv_sincospi(__a, __sptr, __cptr);\n" |
| 2568 | "}\n" |
| 2569 | "__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) {\n" |
| 2570 | " return __nv_sincospif(__a, __sptr, __cptr);\n" |
| 2571 | "}\n" |
| 2572 | "__DEVICE__ float sinf(float __a) {\n" |
| 2573 | " return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a);\n" |
| 2574 | "}\n" |
| 2575 | "__DEVICE__ double sinh(double __a) { return __nv_sinh(__a); }\n" |
| 2576 | "__DEVICE__ float sinhf(float __a) { return __nv_sinhf(__a); }\n" |
| 2577 | "__DEVICE__ double sinpi(double __a) { return __nv_sinpi(__a); }\n" |
| 2578 | "__DEVICE__ float sinpif(float __a) { return __nv_sinpif(__a); }\n" |
| 2579 | "__DEVICE__ double sqrt(double __a) { return __nv_sqrt(__a); }\n" |
| 2580 | "__DEVICE__ float sqrtf(float __a) { return __nv_sqrtf(__a); }\n" |
| 2581 | "__DEVICE__ double tan(double __a) { return __nv_tan(__a); }\n" |
| 2582 | "__DEVICE__ float tanf(float __a) { return __nv_tanf(__a); }\n" |
| 2583 | "__DEVICE__ double tanh(double __a) { return __nv_tanh(__a); }\n" |
| 2584 | "__DEVICE__ float tanhf(float __a) { return __nv_tanhf(__a); }\n" |
| 2585 | "__DEVICE__ double tgamma(double __a) { return __nv_tgamma(__a); }\n" |
| 2586 | "__DEVICE__ float tgammaf(float __a) { return __nv_tgammaf(__a); }\n" |
| 2587 | "__DEVICE__ double trunc(double __a) { return __nv_trunc(__a); }\n" |
| 2588 | "__DEVICE__ float truncf(float __a) { return __nv_truncf(__a); }\n" |
| 2589 | "__DEVICE__ unsigned long long ullmax(unsigned long long __a,\n" |
| 2590 | " unsigned long long __b) {\n" |
| 2591 | " return __nv_ullmax(__a, __b);\n" |
| 2592 | "}\n" |
| 2593 | "__DEVICE__ unsigned long long ullmin(unsigned long long __a,\n" |
| 2594 | " unsigned long long __b) {\n" |
| 2595 | " return __nv_ullmin(__a, __b);\n" |
| 2596 | "}\n" |
| 2597 | "__DEVICE__ unsigned int umax(unsigned int __a, unsigned int __b) {\n" |
| 2598 | " return __nv_umax(__a, __b);\n" |
| 2599 | "}\n" |
| 2600 | "__DEVICE__ unsigned int umin(unsigned int __a, unsigned int __b) {\n" |
| 2601 | " return __nv_umin(__a, __b);\n" |
| 2602 | "}\n" |
| 2603 | "__DEVICE__ double y0(double __a) { return __nv_y0(__a); }\n" |
| 2604 | "__DEVICE__ float y0f(float __a) { return __nv_y0f(__a); }\n" |
| 2605 | "__DEVICE__ double y1(double __a) { return __nv_y1(__a); }\n" |
| 2606 | "__DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }\n" |
| 2607 | "__DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }\n" |
| 2608 | "__DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }\n" |
| 2609 | "\n" |
| 2610 | "#pragma pop_macro(\"__DEVICE__\")\n" |
| 2611 | "#pragma pop_macro(\"__FAST_OR_SLOW\")\n" |
| 2612 | "#endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__\n" |
| 2613 | "" } , |
| 2614 | { "/builtins/__clang_cuda_intrinsics.h" , "/*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---===\n" |
| 2615 | " *\n" |
| 2616 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 2617 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 2618 | " * in the Software without restriction, including without limitation the rights\n" |
| 2619 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 2620 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 2621 | " * furnished to do so, subject to the following conditions:\n" |
| 2622 | " *\n" |
| 2623 | " * The above copyright notice and this permission notice shall be included in\n" |
| 2624 | " * all copies or substantial portions of the Software.\n" |
| 2625 | " *\n" |
| 2626 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 2627 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 2628 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 2629 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 2630 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 2631 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 2632 | " * THE SOFTWARE.\n" |
| 2633 | " *\n" |
| 2634 | " *===-----------------------------------------------------------------------===\n" |
| 2635 | " */\n" |
| 2636 | "#ifndef __CLANG_CUDA_INTRINSICS_H__\n" |
| 2637 | "#define __CLANG_CUDA_INTRINSICS_H__\n" |
| 2638 | "#ifndef __CUDA__\n" |
| 2639 | "#error \"This file is for CUDA compilation only.\"\n" |
| 2640 | "#endif\n" |
| 2641 | "\n" |
| 2642 | "// sm_30 intrinsics: __shfl_{up,down,xor}.\n" |
| 2643 | "\n" |
| 2644 | "#define __SM_30_INTRINSICS_H__\n" |
| 2645 | "#define __SM_30_INTRINSICS_HPP__\n" |
| 2646 | "\n" |
| 2647 | "#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n" |
| 2648 | "\n" |
| 2649 | "#pragma push_macro(\"__MAKE_SHUFFLES\")\n" |
| 2650 | "#define __MAKE_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, __Mask, \\\n" |
| 2651 | " __Type) \\\n" |
| 2652 | " inline __device__ int __FnName(int __val, __Type __offset, \\\n" |
| 2653 | " int __width = warpSize) { \\\n" |
| 2654 | " return __IntIntrinsic(__val, __offset, \\\n" |
| 2655 | " ((warpSize - __width) << 8) | (__Mask)); \\\n" |
| 2656 | " } \\\n" |
| 2657 | " inline __device__ float __FnName(float __val, __Type __offset, \\\n" |
| 2658 | " int __width = warpSize) { \\\n" |
| 2659 | " return __FloatIntrinsic(__val, __offset, \\\n" |
| 2660 | " ((warpSize - __width) << 8) | (__Mask)); \\\n" |
| 2661 | " } \\\n" |
| 2662 | " inline __device__ unsigned int __FnName(unsigned int __val, __Type __offset, \\\n" |
| 2663 | " int __width = warpSize) { \\\n" |
| 2664 | " return static_cast<unsigned int>( \\\n" |
| 2665 | " ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n" |
| 2666 | " } \\\n" |
| 2667 | " inline __device__ long long __FnName(long long __val, __Type __offset, \\\n" |
| 2668 | " int __width = warpSize) { \\\n" |
| 2669 | " struct __Bits { \\\n" |
| 2670 | " int __a, __b; \\\n" |
| 2671 | " }; \\\n" |
| 2672 | " _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n" |
| 2673 | " _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n" |
| 2674 | " __Bits __tmp; \\\n" |
| 2675 | " memcpy(&__val, &__tmp, sizeof(__val)); \\\n" |
| 2676 | " __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \\\n" |
| 2677 | " __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \\\n" |
| 2678 | " long long __ret; \\\n" |
| 2679 | " memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n" |
| 2680 | " return __ret; \\\n" |
| 2681 | " } \\\n" |
| 2682 | " inline __device__ long __FnName(long __val, __Type __offset, \\\n" |
| 2683 | " int __width = warpSize) { \\\n" |
| 2684 | " _Static_assert(sizeof(long) == sizeof(long long) || \\\n" |
| 2685 | " sizeof(long) == sizeof(int)); \\\n" |
| 2686 | " if (sizeof(long) == sizeof(long long)) { \\\n" |
| 2687 | " return static_cast<long>( \\\n" |
| 2688 | " ::__FnName(static_cast<long long>(__val), __offset, __width)); \\\n" |
| 2689 | " } else if (sizeof(long) == sizeof(int)) { \\\n" |
| 2690 | " return static_cast<long>( \\\n" |
| 2691 | " ::__FnName(static_cast<int>(__val), __offset, __width)); \\\n" |
| 2692 | " } \\\n" |
| 2693 | " } \\\n" |
| 2694 | " inline __device__ unsigned long __FnName( \\\n" |
| 2695 | " unsigned long __val, __Type __offset, int __width = warpSize) { \\\n" |
| 2696 | " return static_cast<unsigned long>( \\\n" |
| 2697 | " ::__FnName(static_cast<long>(__val), __offset, __width)); \\\n" |
| 2698 | " } \\\n" |
| 2699 | " inline __device__ unsigned long long __FnName( \\\n" |
| 2700 | " unsigned long long __val, __Type __offset, int __width = warpSize) { \\\n" |
| 2701 | " return static_cast<unsigned long long>(::__FnName( \\\n" |
| 2702 | " static_cast<unsigned long long>(__val), __offset, __width)); \\\n" |
| 2703 | " } \\\n" |
| 2704 | " inline __device__ double __FnName(double __val, __Type __offset, \\\n" |
| 2705 | " int __width = warpSize) { \\\n" |
| 2706 | " long long __tmp; \\\n" |
| 2707 | " _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n" |
| 2708 | " memcpy(&__tmp, &__val, sizeof(__val)); \\\n" |
| 2709 | " __tmp = ::__FnName(__tmp, __offset, __width); \\\n" |
| 2710 | " double __ret; \\\n" |
| 2711 | " memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n" |
| 2712 | " return __ret; \\\n" |
| 2713 | " }\n" |
| 2714 | "\n" |
| 2715 | "__MAKE_SHUFFLES(__shfl, __nvvm_shfl_idx_i32, __nvvm_shfl_idx_f32, 0x1f, int);\n" |
| 2716 | "// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n" |
| 2717 | "// maxLane.\n" |
| 2718 | "__MAKE_SHUFFLES(__shfl_up, __nvvm_shfl_up_i32, __nvvm_shfl_up_f32, 0,\n" |
| 2719 | " unsigned int);\n" |
| 2720 | "__MAKE_SHUFFLES(__shfl_down, __nvvm_shfl_down_i32, __nvvm_shfl_down_f32, 0x1f,\n" |
| 2721 | " unsigned int);\n" |
| 2722 | "__MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,\n" |
| 2723 | " int);\n" |
| 2724 | "#pragma pop_macro(\"__MAKE_SHUFFLES\")\n" |
| 2725 | "\n" |
| 2726 | "#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n" |
| 2727 | "\n" |
| 2728 | "#if CUDA_VERSION >= 9000\n" |
| 2729 | "#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)\n" |
| 2730 | "// __shfl_sync_* variants available in CUDA-9\n" |
| 2731 | "#pragma push_macro(\"__MAKE_SYNC_SHUFFLES\")\n" |
| 2732 | "#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \\\n" |
| 2733 | " __Mask, __Type) \\\n" |
| 2734 | " inline __device__ int __FnName(unsigned int __mask, int __val, \\\n" |
| 2735 | " __Type __offset, int __width = warpSize) { \\\n" |
| 2736 | " return __IntIntrinsic(__mask, __val, __offset, \\\n" |
| 2737 | " ((warpSize - __width) << 8) | (__Mask)); \\\n" |
| 2738 | " } \\\n" |
| 2739 | " inline __device__ float __FnName(unsigned int __mask, float __val, \\\n" |
| 2740 | " __Type __offset, int __width = warpSize) { \\\n" |
| 2741 | " return __FloatIntrinsic(__mask, __val, __offset, \\\n" |
| 2742 | " ((warpSize - __width) << 8) | (__Mask)); \\\n" |
| 2743 | " } \\\n" |
| 2744 | " inline __device__ unsigned int __FnName(unsigned int __mask, \\\n" |
| 2745 | " unsigned int __val, __Type __offset, \\\n" |
| 2746 | " int __width = warpSize) { \\\n" |
| 2747 | " return static_cast<unsigned int>( \\\n" |
| 2748 | " ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n" |
| 2749 | " } \\\n" |
| 2750 | " inline __device__ long long __FnName(unsigned int __mask, long long __val, \\\n" |
| 2751 | " __Type __offset, \\\n" |
| 2752 | " int __width = warpSize) { \\\n" |
| 2753 | " struct __Bits { \\\n" |
| 2754 | " int __a, __b; \\\n" |
| 2755 | " }; \\\n" |
| 2756 | " _Static_assert(sizeof(__val) == sizeof(__Bits)); \\\n" |
| 2757 | " _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \\\n" |
| 2758 | " __Bits __tmp; \\\n" |
| 2759 | " memcpy(&__val, &__tmp, sizeof(__val)); \\\n" |
| 2760 | " __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \\\n" |
| 2761 | " __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \\\n" |
| 2762 | " long long __ret; \\\n" |
| 2763 | " memcpy(&__ret, &__tmp, sizeof(__tmp)); \\\n" |
| 2764 | " return __ret; \\\n" |
| 2765 | " } \\\n" |
| 2766 | " inline __device__ unsigned long long __FnName( \\\n" |
| 2767 | " unsigned int __mask, unsigned long long __val, __Type __offset, \\\n" |
| 2768 | " int __width = warpSize) { \\\n" |
| 2769 | " return static_cast<unsigned long long>(::__FnName( \\\n" |
| 2770 | " __mask, static_cast<unsigned long long>(__val), __offset, __width)); \\\n" |
| 2771 | " } \\\n" |
| 2772 | " inline __device__ long __FnName(unsigned int __mask, long __val, \\\n" |
| 2773 | " __Type __offset, int __width = warpSize) { \\\n" |
| 2774 | " _Static_assert(sizeof(long) == sizeof(long long) || \\\n" |
| 2775 | " sizeof(long) == sizeof(int)); \\\n" |
| 2776 | " if (sizeof(long) == sizeof(long long)) { \\\n" |
| 2777 | " return static_cast<long>(::__FnName( \\\n" |
| 2778 | " __mask, static_cast<long long>(__val), __offset, __width)); \\\n" |
| 2779 | " } else if (sizeof(long) == sizeof(int)) { \\\n" |
| 2780 | " return static_cast<long>( \\\n" |
| 2781 | " ::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \\\n" |
| 2782 | " } \\\n" |
| 2783 | " } \\\n" |
| 2784 | " inline __device__ unsigned long __FnName( \\\n" |
| 2785 | " unsigned int __mask, unsigned long __val, __Type __offset, \\\n" |
| 2786 | " int __width = warpSize) { \\\n" |
| 2787 | " return static_cast<unsigned long>( \\\n" |
| 2788 | " ::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \\\n" |
| 2789 | " } \\\n" |
| 2790 | " inline __device__ double __FnName(unsigned int __mask, double __val, \\\n" |
| 2791 | " __Type __offset, int __width = warpSize) { \\\n" |
| 2792 | " long long __tmp; \\\n" |
| 2793 | " _Static_assert(sizeof(__tmp) == sizeof(__val)); \\\n" |
| 2794 | " memcpy(&__tmp, &__val, sizeof(__val)); \\\n" |
| 2795 | " __tmp = ::__FnName(__mask, __tmp, __offset, __width); \\\n" |
| 2796 | " double __ret; \\\n" |
| 2797 | " memcpy(&__ret, &__tmp, sizeof(__ret)); \\\n" |
| 2798 | " return __ret; \\\n" |
| 2799 | " }\n" |
| 2800 | "__MAKE_SYNC_SHUFFLES(__shfl_sync, __nvvm_shfl_sync_idx_i32,\n" |
| 2801 | " __nvvm_shfl_sync_idx_f32, 0x1f, int);\n" |
| 2802 | "// We use 0 rather than 31 as our mask, because shfl.up applies to lanes >=\n" |
| 2803 | "// maxLane.\n" |
| 2804 | "__MAKE_SYNC_SHUFFLES(__shfl_up_sync, __nvvm_shfl_sync_up_i32,\n" |
| 2805 | " __nvvm_shfl_sync_up_f32, 0, unsigned int);\n" |
| 2806 | "__MAKE_SYNC_SHUFFLES(__shfl_down_sync, __nvvm_shfl_sync_down_i32,\n" |
| 2807 | " __nvvm_shfl_sync_down_f32, 0x1f, unsigned int);\n" |
| 2808 | "__MAKE_SYNC_SHUFFLES(__shfl_xor_sync, __nvvm_shfl_sync_bfly_i32,\n" |
| 2809 | " __nvvm_shfl_sync_bfly_f32, 0x1f, int);\n" |
| 2810 | "#pragma pop_macro(\"__MAKE_SYNC_SHUFFLES\")\n" |
| 2811 | "\n" |
| 2812 | "inline __device__ void __syncwarp(unsigned int mask = 0xffffffff) {\n" |
| 2813 | " return __nvvm_bar_warp_sync(mask);\n" |
| 2814 | "}\n" |
| 2815 | "\n" |
| 2816 | "inline __device__ void __barrier_sync(unsigned int id) {\n" |
| 2817 | " __nvvm_barrier_sync(id);\n" |
| 2818 | "}\n" |
| 2819 | "\n" |
| 2820 | "inline __device__ void __barrier_sync_count(unsigned int id,\n" |
| 2821 | " unsigned int count) {\n" |
| 2822 | " __nvvm_barrier_sync_cnt(id, count);\n" |
| 2823 | "}\n" |
| 2824 | "\n" |
| 2825 | "inline __device__ int __all_sync(unsigned int mask, int pred) {\n" |
| 2826 | " return __nvvm_vote_all_sync(mask, pred);\n" |
| 2827 | "}\n" |
| 2828 | "\n" |
| 2829 | "inline __device__ int __any_sync(unsigned int mask, int pred) {\n" |
| 2830 | " return __nvvm_vote_any_sync(mask, pred);\n" |
| 2831 | "}\n" |
| 2832 | "\n" |
| 2833 | "inline __device__ int __uni_sync(unsigned int mask, int pred) {\n" |
| 2834 | " return __nvvm_vote_uni_sync(mask, pred);\n" |
| 2835 | "}\n" |
| 2836 | "\n" |
| 2837 | "inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {\n" |
| 2838 | " return __nvvm_vote_ballot_sync(mask, pred);\n" |
| 2839 | "}\n" |
| 2840 | "\n" |
| 2841 | "inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }\n" |
| 2842 | "\n" |
| 2843 | "inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {\n" |
| 2844 | " return __nvvm_fns(mask, base, offset);\n" |
| 2845 | "}\n" |
| 2846 | "\n" |
| 2847 | "#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300\n" |
| 2848 | "\n" |
| 2849 | "// Define __match* builtins CUDA-9 headers expect to see.\n" |
| 2850 | "#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n" |
| 2851 | "inline __device__ unsigned int __match32_any_sync(unsigned int mask,\n" |
| 2852 | " unsigned int value) {\n" |
| 2853 | " return __nvvm_match_any_sync_i32(mask, value);\n" |
| 2854 | "}\n" |
| 2855 | "\n" |
| 2856 | "inline __device__ unsigned long long\n" |
| 2857 | "__match64_any_sync(unsigned int mask, unsigned long long value) {\n" |
| 2858 | " return __nvvm_match_any_sync_i64(mask, value);\n" |
| 2859 | "}\n" |
| 2860 | "\n" |
| 2861 | "inline __device__ unsigned int\n" |
| 2862 | "__match32_all_sync(unsigned int mask, unsigned int value, int *pred) {\n" |
| 2863 | " return __nvvm_match_all_sync_i32p(mask, value, pred);\n" |
| 2864 | "}\n" |
| 2865 | "\n" |
| 2866 | "inline __device__ unsigned long long\n" |
| 2867 | "__match64_all_sync(unsigned int mask, unsigned long long value, int *pred) {\n" |
| 2868 | " return __nvvm_match_all_sync_i64p(mask, value, pred);\n" |
| 2869 | "}\n" |
| 2870 | "#include \"crt/sm_70_rt.hpp\"\n" |
| 2871 | "\n" |
| 2872 | "#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700\n" |
| 2873 | "#endif // __CUDA_VERSION >= 9000\n" |
| 2874 | "\n" |
| 2875 | "// sm_32 intrinsics: __ldg and __funnelshift_{l,lc,r,rc}.\n" |
| 2876 | "\n" |
| 2877 | "// Prevent the vanilla sm_32 intrinsics header from being included.\n" |
| 2878 | "#define __SM_32_INTRINSICS_H__\n" |
| 2879 | "#define __SM_32_INTRINSICS_HPP__\n" |
| 2880 | "\n" |
| 2881 | "#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n" |
| 2882 | "\n" |
| 2883 | "inline __device__ char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); }\n" |
| 2884 | "inline __device__ short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); }\n" |
| 2885 | "inline __device__ int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); }\n" |
| 2886 | "inline __device__ long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); }\n" |
| 2887 | "inline __device__ long long __ldg(const long long *ptr) {\n" |
| 2888 | " return __nvvm_ldg_ll(ptr);\n" |
| 2889 | "}\n" |
| 2890 | "inline __device__ unsigned char __ldg(const unsigned char *ptr) {\n" |
| 2891 | " return __nvvm_ldg_uc(ptr);\n" |
| 2892 | "}\n" |
| 2893 | "inline __device__ signed char __ldg(const signed char *ptr) {\n" |
| 2894 | " return __nvvm_ldg_uc((const unsigned char *)ptr);\n" |
| 2895 | "}\n" |
| 2896 | "inline __device__ unsigned short __ldg(const unsigned short *ptr) {\n" |
| 2897 | " return __nvvm_ldg_us(ptr);\n" |
| 2898 | "}\n" |
| 2899 | "inline __device__ unsigned int __ldg(const unsigned int *ptr) {\n" |
| 2900 | " return __nvvm_ldg_ui(ptr);\n" |
| 2901 | "}\n" |
| 2902 | "inline __device__ unsigned long __ldg(const unsigned long *ptr) {\n" |
| 2903 | " return __nvvm_ldg_ul(ptr);\n" |
| 2904 | "}\n" |
| 2905 | "inline __device__ unsigned long long __ldg(const unsigned long long *ptr) {\n" |
| 2906 | " return __nvvm_ldg_ull(ptr);\n" |
| 2907 | "}\n" |
| 2908 | "inline __device__ float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); }\n" |
| 2909 | "inline __device__ double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); }\n" |
| 2910 | "\n" |
| 2911 | "inline __device__ char2 __ldg(const char2 *ptr) {\n" |
| 2912 | " typedef char c2 __attribute__((ext_vector_type(2)));\n" |
| 2913 | " // We can assume that ptr is aligned at least to char2's alignment, but the\n" |
| 2914 | " // load will assume that ptr is aligned to char2's alignment. This is only\n" |
| 2915 | " // safe if alignof(c2) <= alignof(char2).\n" |
| 2916 | " c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr));\n" |
| 2917 | " char2 ret;\n" |
| 2918 | " ret.x = rv[0];\n" |
| 2919 | " ret.y = rv[1];\n" |
| 2920 | " return ret;\n" |
| 2921 | "}\n" |
| 2922 | "inline __device__ char4 __ldg(const char4 *ptr) {\n" |
| 2923 | " typedef char c4 __attribute__((ext_vector_type(4)));\n" |
| 2924 | " c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));\n" |
| 2925 | " char4 ret;\n" |
| 2926 | " ret.x = rv[0];\n" |
| 2927 | " ret.y = rv[1];\n" |
| 2928 | " ret.z = rv[2];\n" |
| 2929 | " ret.w = rv[3];\n" |
| 2930 | " return ret;\n" |
| 2931 | "}\n" |
| 2932 | "inline __device__ short2 __ldg(const short2 *ptr) {\n" |
| 2933 | " typedef short s2 __attribute__((ext_vector_type(2)));\n" |
| 2934 | " s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr));\n" |
| 2935 | " short2 ret;\n" |
| 2936 | " ret.x = rv[0];\n" |
| 2937 | " ret.y = rv[1];\n" |
| 2938 | " return ret;\n" |
| 2939 | "}\n" |
| 2940 | "inline __device__ short4 __ldg(const short4 *ptr) {\n" |
| 2941 | " typedef short s4 __attribute__((ext_vector_type(4)));\n" |
| 2942 | " s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));\n" |
| 2943 | " short4 ret;\n" |
| 2944 | " ret.x = rv[0];\n" |
| 2945 | " ret.y = rv[1];\n" |
| 2946 | " ret.z = rv[2];\n" |
| 2947 | " ret.w = rv[3];\n" |
| 2948 | " return ret;\n" |
| 2949 | "}\n" |
| 2950 | "inline __device__ int2 __ldg(const int2 *ptr) {\n" |
| 2951 | " typedef int i2 __attribute__((ext_vector_type(2)));\n" |
| 2952 | " i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr));\n" |
| 2953 | " int2 ret;\n" |
| 2954 | " ret.x = rv[0];\n" |
| 2955 | " ret.y = rv[1];\n" |
| 2956 | " return ret;\n" |
| 2957 | "}\n" |
| 2958 | "inline __device__ int4 __ldg(const int4 *ptr) {\n" |
| 2959 | " typedef int i4 __attribute__((ext_vector_type(4)));\n" |
| 2960 | " i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));\n" |
| 2961 | " int4 ret;\n" |
| 2962 | " ret.x = rv[0];\n" |
| 2963 | " ret.y = rv[1];\n" |
| 2964 | " ret.z = rv[2];\n" |
| 2965 | " ret.w = rv[3];\n" |
| 2966 | " return ret;\n" |
| 2967 | "}\n" |
| 2968 | "inline __device__ longlong2 __ldg(const longlong2 *ptr) {\n" |
| 2969 | " typedef long long ll2 __attribute__((ext_vector_type(2)));\n" |
| 2970 | " ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr));\n" |
| 2971 | " longlong2 ret;\n" |
| 2972 | " ret.x = rv[0];\n" |
| 2973 | " ret.y = rv[1];\n" |
| 2974 | " return ret;\n" |
| 2975 | "}\n" |
| 2976 | "\n" |
| 2977 | "inline __device__ uchar2 __ldg(const uchar2 *ptr) {\n" |
| 2978 | " typedef unsigned char uc2 __attribute__((ext_vector_type(2)));\n" |
| 2979 | " uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr));\n" |
| 2980 | " uchar2 ret;\n" |
| 2981 | " ret.x = rv[0];\n" |
| 2982 | " ret.y = rv[1];\n" |
| 2983 | " return ret;\n" |
| 2984 | "}\n" |
| 2985 | "inline __device__ uchar4 __ldg(const uchar4 *ptr) {\n" |
| 2986 | " typedef unsigned char uc4 __attribute__((ext_vector_type(4)));\n" |
| 2987 | " uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));\n" |
| 2988 | " uchar4 ret;\n" |
| 2989 | " ret.x = rv[0];\n" |
| 2990 | " ret.y = rv[1];\n" |
| 2991 | " ret.z = rv[2];\n" |
| 2992 | " ret.w = rv[3];\n" |
| 2993 | " return ret;\n" |
| 2994 | "}\n" |
| 2995 | "inline __device__ ushort2 __ldg(const ushort2 *ptr) {\n" |
| 2996 | " typedef unsigned short us2 __attribute__((ext_vector_type(2)));\n" |
| 2997 | " us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr));\n" |
| 2998 | " ushort2 ret;\n" |
| 2999 | " ret.x = rv[0];\n" |
| 3000 | " ret.y = rv[1];\n" |
| 3001 | " return ret;\n" |
| 3002 | "}\n" |
| 3003 | "inline __device__ ushort4 __ldg(const ushort4 *ptr) {\n" |
| 3004 | " typedef unsigned short us4 __attribute__((ext_vector_type(4)));\n" |
| 3005 | " us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));\n" |
| 3006 | " ushort4 ret;\n" |
| 3007 | " ret.x = rv[0];\n" |
| 3008 | " ret.y = rv[1];\n" |
| 3009 | " ret.z = rv[2];\n" |
| 3010 | " ret.w = rv[3];\n" |
| 3011 | " return ret;\n" |
| 3012 | "}\n" |
| 3013 | "inline __device__ uint2 __ldg(const uint2 *ptr) {\n" |
| 3014 | " typedef unsigned int ui2 __attribute__((ext_vector_type(2)));\n" |
| 3015 | " ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr));\n" |
| 3016 | " uint2 ret;\n" |
| 3017 | " ret.x = rv[0];\n" |
| 3018 | " ret.y = rv[1];\n" |
| 3019 | " return ret;\n" |
| 3020 | "}\n" |
| 3021 | "inline __device__ uint4 __ldg(const uint4 *ptr) {\n" |
| 3022 | " typedef unsigned int ui4 __attribute__((ext_vector_type(4)));\n" |
| 3023 | " ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));\n" |
| 3024 | " uint4 ret;\n" |
| 3025 | " ret.x = rv[0];\n" |
| 3026 | " ret.y = rv[1];\n" |
| 3027 | " ret.z = rv[2];\n" |
| 3028 | " ret.w = rv[3];\n" |
| 3029 | " return ret;\n" |
| 3030 | "}\n" |
| 3031 | "inline __device__ ulonglong2 __ldg(const ulonglong2 *ptr) {\n" |
| 3032 | " typedef unsigned long long ull2 __attribute__((ext_vector_type(2)));\n" |
| 3033 | " ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr));\n" |
| 3034 | " ulonglong2 ret;\n" |
| 3035 | " ret.x = rv[0];\n" |
| 3036 | " ret.y = rv[1];\n" |
| 3037 | " return ret;\n" |
| 3038 | "}\n" |
| 3039 | "\n" |
| 3040 | "inline __device__ float2 __ldg(const float2 *ptr) {\n" |
| 3041 | " typedef float f2 __attribute__((ext_vector_type(2)));\n" |
| 3042 | " f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr));\n" |
| 3043 | " float2 ret;\n" |
| 3044 | " ret.x = rv[0];\n" |
| 3045 | " ret.y = rv[1];\n" |
| 3046 | " return ret;\n" |
| 3047 | "}\n" |
| 3048 | "inline __device__ float4 __ldg(const float4 *ptr) {\n" |
| 3049 | " typedef float f4 __attribute__((ext_vector_type(4)));\n" |
| 3050 | " f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));\n" |
| 3051 | " float4 ret;\n" |
| 3052 | " ret.x = rv[0];\n" |
| 3053 | " ret.y = rv[1];\n" |
| 3054 | " ret.z = rv[2];\n" |
| 3055 | " ret.w = rv[3];\n" |
| 3056 | " return ret;\n" |
| 3057 | "}\n" |
| 3058 | "inline __device__ double2 __ldg(const double2 *ptr) {\n" |
| 3059 | " typedef double d2 __attribute__((ext_vector_type(2)));\n" |
| 3060 | " d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr));\n" |
| 3061 | " double2 ret;\n" |
| 3062 | " ret.x = rv[0];\n" |
| 3063 | " ret.y = rv[1];\n" |
| 3064 | " return ret;\n" |
| 3065 | "}\n" |
| 3066 | "\n" |
| 3067 | "// TODO: Implement these as intrinsics, so the backend can work its magic on\n" |
| 3068 | "// these. Alternatively, we could implement these as plain C and try to get\n" |
| 3069 | "// llvm to recognize the relevant patterns.\n" |
| 3070 | "inline __device__ unsigned __funnelshift_l(unsigned low32, unsigned high32,\n" |
| 3071 | " unsigned shiftWidth) {\n" |
| 3072 | " unsigned result;\n" |
| 3073 | " asm(\"shf.l.wrap.b32 %0, %1, %2, %3;\"\n" |
| 3074 | " : \"=r\"(result)\n" |
| 3075 | " : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n" |
| 3076 | " return result;\n" |
| 3077 | "}\n" |
| 3078 | "inline __device__ unsigned __funnelshift_lc(unsigned low32, unsigned high32,\n" |
| 3079 | " unsigned shiftWidth) {\n" |
| 3080 | " unsigned result;\n" |
| 3081 | " asm(\"shf.l.clamp.b32 %0, %1, %2, %3;\"\n" |
| 3082 | " : \"=r\"(result)\n" |
| 3083 | " : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n" |
| 3084 | " return result;\n" |
| 3085 | "}\n" |
| 3086 | "inline __device__ unsigned __funnelshift_r(unsigned low32, unsigned high32,\n" |
| 3087 | " unsigned shiftWidth) {\n" |
| 3088 | " unsigned result;\n" |
| 3089 | " asm(\"shf.r.wrap.b32 %0, %1, %2, %3;\"\n" |
| 3090 | " : \"=r\"(result)\n" |
| 3091 | " : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n" |
| 3092 | " return result;\n" |
| 3093 | "}\n" |
| 3094 | "inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,\n" |
| 3095 | " unsigned shiftWidth) {\n" |
| 3096 | " unsigned ret;\n" |
| 3097 | " asm(\"shf.r.clamp.b32 %0, %1, %2, %3;\"\n" |
| 3098 | " : \"=r\"(ret)\n" |
| 3099 | " : \"r\"(low32), \"r\"(high32), \"r\"(shiftWidth));\n" |
| 3100 | " return ret;\n" |
| 3101 | "}\n" |
| 3102 | "\n" |
| 3103 | "#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320\n" |
| 3104 | "\n" |
| 3105 | "#endif // defined(__CLANG_CUDA_INTRINSICS_H__)\n" |
| 3106 | "" } , |
| 3107 | { "/builtins/__clang_cuda_libdevice_declares.h" , "/*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --===\n" |
| 3108 | " *\n" |
| 3109 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 3110 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 3111 | " * in the Software without restriction, including without limitation the rights\n" |
| 3112 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 3113 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 3114 | " * furnished to do so, subject to the following conditions:\n" |
| 3115 | " *\n" |
| 3116 | " * The above copyright notice and this permission notice shall be included in\n" |
| 3117 | " * all copies or substantial portions of the Software.\n" |
| 3118 | " *\n" |
| 3119 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 3120 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 3121 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 3122 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 3123 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 3124 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 3125 | " * THE SOFTWARE.\n" |
| 3126 | " *\n" |
| 3127 | " *===-----------------------------------------------------------------------===\n" |
| 3128 | " */\n" |
| 3129 | "\n" |
| 3130 | "#ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n" |
| 3131 | "#define __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n" |
| 3132 | "\n" |
| 3133 | "extern \"C\" {\n" |
| 3134 | "\n" |
| 3135 | "__device__ int __nv_abs(int __a);\n" |
| 3136 | "__device__ double __nv_acos(double __a);\n" |
| 3137 | "__device__ float __nv_acosf(float __a);\n" |
| 3138 | "__device__ double __nv_acosh(double __a);\n" |
| 3139 | "__device__ float __nv_acoshf(float __a);\n" |
| 3140 | "__device__ double __nv_asin(double __a);\n" |
| 3141 | "__device__ float __nv_asinf(float __a);\n" |
| 3142 | "__device__ double __nv_asinh(double __a);\n" |
| 3143 | "__device__ float __nv_asinhf(float __a);\n" |
| 3144 | "__device__ double __nv_atan2(double __a, double __b);\n" |
| 3145 | "__device__ float __nv_atan2f(float __a, float __b);\n" |
| 3146 | "__device__ double __nv_atan(double __a);\n" |
| 3147 | "__device__ float __nv_atanf(float __a);\n" |
| 3148 | "__device__ double __nv_atanh(double __a);\n" |
| 3149 | "__device__ float __nv_atanhf(float __a);\n" |
| 3150 | "__device__ int __nv_brev(int __a);\n" |
| 3151 | "__device__ long long __nv_brevll(long long __a);\n" |
| 3152 | "__device__ int __nv_byte_perm(int __a, int __b, int __c);\n" |
| 3153 | "__device__ double __nv_cbrt(double __a);\n" |
| 3154 | "__device__ float __nv_cbrtf(float __a);\n" |
| 3155 | "__device__ double __nv_ceil(double __a);\n" |
| 3156 | "__device__ float __nv_ceilf(float __a);\n" |
| 3157 | "__device__ int __nv_clz(int __a);\n" |
| 3158 | "__device__ int __nv_clzll(long long __a);\n" |
| 3159 | "__device__ double __nv_copysign(double __a, double __b);\n" |
| 3160 | "__device__ float __nv_copysignf(float __a, float __b);\n" |
| 3161 | "__device__ double __nv_cos(double __a);\n" |
| 3162 | "__device__ float __nv_cosf(float __a);\n" |
| 3163 | "__device__ double __nv_cosh(double __a);\n" |
| 3164 | "__device__ float __nv_coshf(float __a);\n" |
| 3165 | "__device__ double __nv_cospi(double __a);\n" |
| 3166 | "__device__ float __nv_cospif(float __a);\n" |
| 3167 | "__device__ double __nv_cyl_bessel_i0(double __a);\n" |
| 3168 | "__device__ float __nv_cyl_bessel_i0f(float __a);\n" |
| 3169 | "__device__ double __nv_cyl_bessel_i1(double __a);\n" |
| 3170 | "__device__ float __nv_cyl_bessel_i1f(float __a);\n" |
| 3171 | "__device__ double __nv_dadd_rd(double __a, double __b);\n" |
| 3172 | "__device__ double __nv_dadd_rn(double __a, double __b);\n" |
| 3173 | "__device__ double __nv_dadd_ru(double __a, double __b);\n" |
| 3174 | "__device__ double __nv_dadd_rz(double __a, double __b);\n" |
| 3175 | "__device__ double __nv_ddiv_rd(double __a, double __b);\n" |
| 3176 | "__device__ double __nv_ddiv_rn(double __a, double __b);\n" |
| 3177 | "__device__ double __nv_ddiv_ru(double __a, double __b);\n" |
| 3178 | "__device__ double __nv_ddiv_rz(double __a, double __b);\n" |
| 3179 | "__device__ double __nv_dmul_rd(double __a, double __b);\n" |
| 3180 | "__device__ double __nv_dmul_rn(double __a, double __b);\n" |
| 3181 | "__device__ double __nv_dmul_ru(double __a, double __b);\n" |
| 3182 | "__device__ double __nv_dmul_rz(double __a, double __b);\n" |
| 3183 | "__device__ float __nv_double2float_rd(double __a);\n" |
| 3184 | "__device__ float __nv_double2float_rn(double __a);\n" |
| 3185 | "__device__ float __nv_double2float_ru(double __a);\n" |
| 3186 | "__device__ float __nv_double2float_rz(double __a);\n" |
| 3187 | "__device__ int __nv_double2hiint(double __a);\n" |
| 3188 | "__device__ int __nv_double2int_rd(double __a);\n" |
| 3189 | "__device__ int __nv_double2int_rn(double __a);\n" |
| 3190 | "__device__ int __nv_double2int_ru(double __a);\n" |
| 3191 | "__device__ int __nv_double2int_rz(double __a);\n" |
| 3192 | "__device__ long long __nv_double2ll_rd(double __a);\n" |
| 3193 | "__device__ long long __nv_double2ll_rn(double __a);\n" |
| 3194 | "__device__ long long __nv_double2ll_ru(double __a);\n" |
| 3195 | "__device__ long long __nv_double2ll_rz(double __a);\n" |
| 3196 | "__device__ int __nv_double2loint(double __a);\n" |
| 3197 | "__device__ unsigned int __nv_double2uint_rd(double __a);\n" |
| 3198 | "__device__ unsigned int __nv_double2uint_rn(double __a);\n" |
| 3199 | "__device__ unsigned int __nv_double2uint_ru(double __a);\n" |
| 3200 | "__device__ unsigned int __nv_double2uint_rz(double __a);\n" |
| 3201 | "__device__ unsigned long long __nv_double2ull_rd(double __a);\n" |
| 3202 | "__device__ unsigned long long __nv_double2ull_rn(double __a);\n" |
| 3203 | "__device__ unsigned long long __nv_double2ull_ru(double __a);\n" |
| 3204 | "__device__ unsigned long long __nv_double2ull_rz(double __a);\n" |
| 3205 | "__device__ unsigned long long __nv_double_as_longlong(double __a);\n" |
| 3206 | "__device__ double __nv_drcp_rd(double __a);\n" |
| 3207 | "__device__ double __nv_drcp_rn(double __a);\n" |
| 3208 | "__device__ double __nv_drcp_ru(double __a);\n" |
| 3209 | "__device__ double __nv_drcp_rz(double __a);\n" |
| 3210 | "__device__ double __nv_dsqrt_rd(double __a);\n" |
| 3211 | "__device__ double __nv_dsqrt_rn(double __a);\n" |
| 3212 | "__device__ double __nv_dsqrt_ru(double __a);\n" |
| 3213 | "__device__ double __nv_dsqrt_rz(double __a);\n" |
| 3214 | "__device__ double __nv_dsub_rd(double __a, double __b);\n" |
| 3215 | "__device__ double __nv_dsub_rn(double __a, double __b);\n" |
| 3216 | "__device__ double __nv_dsub_ru(double __a, double __b);\n" |
| 3217 | "__device__ double __nv_dsub_rz(double __a, double __b);\n" |
| 3218 | "__device__ double __nv_erfc(double __a);\n" |
| 3219 | "__device__ float __nv_erfcf(float __a);\n" |
| 3220 | "__device__ double __nv_erfcinv(double __a);\n" |
| 3221 | "__device__ float __nv_erfcinvf(float __a);\n" |
| 3222 | "__device__ double __nv_erfcx(double __a);\n" |
| 3223 | "__device__ float __nv_erfcxf(float __a);\n" |
| 3224 | "__device__ double __nv_erf(double __a);\n" |
| 3225 | "__device__ float __nv_erff(float __a);\n" |
| 3226 | "__device__ double __nv_erfinv(double __a);\n" |
| 3227 | "__device__ float __nv_erfinvf(float __a);\n" |
| 3228 | "__device__ double __nv_exp10(double __a);\n" |
| 3229 | "__device__ float __nv_exp10f(float __a);\n" |
| 3230 | "__device__ double __nv_exp2(double __a);\n" |
| 3231 | "__device__ float __nv_exp2f(float __a);\n" |
| 3232 | "__device__ double __nv_exp(double __a);\n" |
| 3233 | "__device__ float __nv_expf(float __a);\n" |
| 3234 | "__device__ double __nv_expm1(double __a);\n" |
| 3235 | "__device__ float __nv_expm1f(float __a);\n" |
| 3236 | "__device__ double __nv_fabs(double __a);\n" |
| 3237 | "__device__ float __nv_fabsf(float __a);\n" |
| 3238 | "__device__ float __nv_fadd_rd(float __a, float __b);\n" |
| 3239 | "__device__ float __nv_fadd_rn(float __a, float __b);\n" |
| 3240 | "__device__ float __nv_fadd_ru(float __a, float __b);\n" |
| 3241 | "__device__ float __nv_fadd_rz(float __a, float __b);\n" |
| 3242 | "__device__ float __nv_fast_cosf(float __a);\n" |
| 3243 | "__device__ float __nv_fast_exp10f(float __a);\n" |
| 3244 | "__device__ float __nv_fast_expf(float __a);\n" |
| 3245 | "__device__ float __nv_fast_fdividef(float __a, float __b);\n" |
| 3246 | "__device__ float __nv_fast_log10f(float __a);\n" |
| 3247 | "__device__ float __nv_fast_log2f(float __a);\n" |
| 3248 | "__device__ float __nv_fast_logf(float __a);\n" |
| 3249 | "__device__ float __nv_fast_powf(float __a, float __b);\n" |
| 3250 | "__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr);\n" |
| 3251 | "__device__ float __nv_fast_sinf(float __a);\n" |
| 3252 | "__device__ float __nv_fast_tanf(float __a);\n" |
| 3253 | "__device__ double __nv_fdim(double __a, double __b);\n" |
| 3254 | "__device__ float __nv_fdimf(float __a, float __b);\n" |
| 3255 | "__device__ float __nv_fdiv_rd(float __a, float __b);\n" |
| 3256 | "__device__ float __nv_fdiv_rn(float __a, float __b);\n" |
| 3257 | "__device__ float __nv_fdiv_ru(float __a, float __b);\n" |
| 3258 | "__device__ float __nv_fdiv_rz(float __a, float __b);\n" |
| 3259 | "__device__ int __nv_ffs(int __a);\n" |
| 3260 | "__device__ int __nv_ffsll(long long __a);\n" |
| 3261 | "__device__ int __nv_finitef(float __a);\n" |
| 3262 | "__device__ unsigned short __nv_float2half_rn(float __a);\n" |
| 3263 | "__device__ int __nv_float2int_rd(float __a);\n" |
| 3264 | "__device__ int __nv_float2int_rn(float __a);\n" |
| 3265 | "__device__ int __nv_float2int_ru(float __a);\n" |
| 3266 | "__device__ int __nv_float2int_rz(float __a);\n" |
| 3267 | "__device__ long long __nv_float2ll_rd(float __a);\n" |
| 3268 | "__device__ long long __nv_float2ll_rn(float __a);\n" |
| 3269 | "__device__ long long __nv_float2ll_ru(float __a);\n" |
| 3270 | "__device__ long long __nv_float2ll_rz(float __a);\n" |
| 3271 | "__device__ unsigned int __nv_float2uint_rd(float __a);\n" |
| 3272 | "__device__ unsigned int __nv_float2uint_rn(float __a);\n" |
| 3273 | "__device__ unsigned int __nv_float2uint_ru(float __a);\n" |
| 3274 | "__device__ unsigned int __nv_float2uint_rz(float __a);\n" |
| 3275 | "__device__ unsigned long long __nv_float2ull_rd(float __a);\n" |
| 3276 | "__device__ unsigned long long __nv_float2ull_rn(float __a);\n" |
| 3277 | "__device__ unsigned long long __nv_float2ull_ru(float __a);\n" |
| 3278 | "__device__ unsigned long long __nv_float2ull_rz(float __a);\n" |
| 3279 | "__device__ int __nv_float_as_int(float __a);\n" |
| 3280 | "__device__ unsigned int __nv_float_as_uint(float __a);\n" |
| 3281 | "__device__ double __nv_floor(double __a);\n" |
| 3282 | "__device__ float __nv_floorf(float __a);\n" |
| 3283 | "__device__ double __nv_fma(double __a, double __b, double __c);\n" |
| 3284 | "__device__ float __nv_fmaf(float __a, float __b, float __c);\n" |
| 3285 | "__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c);\n" |
| 3286 | "__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c);\n" |
| 3287 | "__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c);\n" |
| 3288 | "__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c);\n" |
| 3289 | "__device__ float __nv_fmaf_rd(float __a, float __b, float __c);\n" |
| 3290 | "__device__ float __nv_fmaf_rn(float __a, float __b, float __c);\n" |
| 3291 | "__device__ float __nv_fmaf_ru(float __a, float __b, float __c);\n" |
| 3292 | "__device__ float __nv_fmaf_rz(float __a, float __b, float __c);\n" |
| 3293 | "__device__ double __nv_fma_rd(double __a, double __b, double __c);\n" |
| 3294 | "__device__ double __nv_fma_rn(double __a, double __b, double __c);\n" |
| 3295 | "__device__ double __nv_fma_ru(double __a, double __b, double __c);\n" |
| 3296 | "__device__ double __nv_fma_rz(double __a, double __b, double __c);\n" |
| 3297 | "__device__ double __nv_fmax(double __a, double __b);\n" |
| 3298 | "__device__ float __nv_fmaxf(float __a, float __b);\n" |
| 3299 | "__device__ double __nv_fmin(double __a, double __b);\n" |
| 3300 | "__device__ float __nv_fminf(float __a, float __b);\n" |
| 3301 | "__device__ double __nv_fmod(double __a, double __b);\n" |
| 3302 | "__device__ float __nv_fmodf(float __a, float __b);\n" |
| 3303 | "__device__ float __nv_fmul_rd(float __a, float __b);\n" |
| 3304 | "__device__ float __nv_fmul_rn(float __a, float __b);\n" |
| 3305 | "__device__ float __nv_fmul_ru(float __a, float __b);\n" |
| 3306 | "__device__ float __nv_fmul_rz(float __a, float __b);\n" |
| 3307 | "__device__ float __nv_frcp_rd(float __a);\n" |
| 3308 | "__device__ float __nv_frcp_rn(float __a);\n" |
| 3309 | "__device__ float __nv_frcp_ru(float __a);\n" |
| 3310 | "__device__ float __nv_frcp_rz(float __a);\n" |
| 3311 | "__device__ double __nv_frexp(double __a, int *__b);\n" |
| 3312 | "__device__ float __nv_frexpf(float __a, int *__b);\n" |
| 3313 | "__device__ float __nv_frsqrt_rn(float __a);\n" |
| 3314 | "__device__ float __nv_fsqrt_rd(float __a);\n" |
| 3315 | "__device__ float __nv_fsqrt_rn(float __a);\n" |
| 3316 | "__device__ float __nv_fsqrt_ru(float __a);\n" |
| 3317 | "__device__ float __nv_fsqrt_rz(float __a);\n" |
| 3318 | "__device__ float __nv_fsub_rd(float __a, float __b);\n" |
| 3319 | "__device__ float __nv_fsub_rn(float __a, float __b);\n" |
| 3320 | "__device__ float __nv_fsub_ru(float __a, float __b);\n" |
| 3321 | "__device__ float __nv_fsub_rz(float __a, float __b);\n" |
| 3322 | "__device__ int __nv_hadd(int __a, int __b);\n" |
| 3323 | "__device__ float __nv_half2float(unsigned short __h);\n" |
| 3324 | "__device__ double __nv_hiloint2double(int __a, int __b);\n" |
| 3325 | "__device__ double __nv_hypot(double __a, double __b);\n" |
| 3326 | "__device__ float __nv_hypotf(float __a, float __b);\n" |
| 3327 | "__device__ int __nv_ilogb(double __a);\n" |
| 3328 | "__device__ int __nv_ilogbf(float __a);\n" |
| 3329 | "__device__ double __nv_int2double_rn(int __a);\n" |
| 3330 | "__device__ float __nv_int2float_rd(int __a);\n" |
| 3331 | "__device__ float __nv_int2float_rn(int __a);\n" |
| 3332 | "__device__ float __nv_int2float_ru(int __a);\n" |
| 3333 | "__device__ float __nv_int2float_rz(int __a);\n" |
| 3334 | "__device__ float __nv_int_as_float(int __a);\n" |
| 3335 | "__device__ int __nv_isfinited(double __a);\n" |
| 3336 | "__device__ int __nv_isinfd(double __a);\n" |
| 3337 | "__device__ int __nv_isinff(float __a);\n" |
| 3338 | "__device__ int __nv_isnand(double __a);\n" |
| 3339 | "__device__ int __nv_isnanf(float __a);\n" |
| 3340 | "__device__ double __nv_j0(double __a);\n" |
| 3341 | "__device__ float __nv_j0f(float __a);\n" |
| 3342 | "__device__ double __nv_j1(double __a);\n" |
| 3343 | "__device__ float __nv_j1f(float __a);\n" |
| 3344 | "__device__ float __nv_jnf(int __a, float __b);\n" |
| 3345 | "__device__ double __nv_jn(int __a, double __b);\n" |
| 3346 | "__device__ double __nv_ldexp(double __a, int __b);\n" |
| 3347 | "__device__ float __nv_ldexpf(float __a, int __b);\n" |
| 3348 | "__device__ double __nv_lgamma(double __a);\n" |
| 3349 | "__device__ float __nv_lgammaf(float __a);\n" |
| 3350 | "__device__ double __nv_ll2double_rd(long long __a);\n" |
| 3351 | "__device__ double __nv_ll2double_rn(long long __a);\n" |
| 3352 | "__device__ double __nv_ll2double_ru(long long __a);\n" |
| 3353 | "__device__ double __nv_ll2double_rz(long long __a);\n" |
| 3354 | "__device__ float __nv_ll2float_rd(long long __a);\n" |
| 3355 | "__device__ float __nv_ll2float_rn(long long __a);\n" |
| 3356 | "__device__ float __nv_ll2float_ru(long long __a);\n" |
| 3357 | "__device__ float __nv_ll2float_rz(long long __a);\n" |
| 3358 | "__device__ long long __nv_llabs(long long __a);\n" |
| 3359 | "__device__ long long __nv_llmax(long long __a, long long __b);\n" |
| 3360 | "__device__ long long __nv_llmin(long long __a, long long __b);\n" |
| 3361 | "__device__ long long __nv_llrint(double __a);\n" |
| 3362 | "__device__ long long __nv_llrintf(float __a);\n" |
| 3363 | "__device__ long long __nv_llround(double __a);\n" |
| 3364 | "__device__ long long __nv_llroundf(float __a);\n" |
| 3365 | "__device__ double __nv_log10(double __a);\n" |
| 3366 | "__device__ float __nv_log10f(float __a);\n" |
| 3367 | "__device__ double __nv_log1p(double __a);\n" |
| 3368 | "__device__ float __nv_log1pf(float __a);\n" |
| 3369 | "__device__ double __nv_log2(double __a);\n" |
| 3370 | "__device__ float __nv_log2f(float __a);\n" |
| 3371 | "__device__ double __nv_logb(double __a);\n" |
| 3372 | "__device__ float __nv_logbf(float __a);\n" |
| 3373 | "__device__ double __nv_log(double __a);\n" |
| 3374 | "__device__ float __nv_logf(float __a);\n" |
| 3375 | "__device__ double __nv_longlong_as_double(long long __a);\n" |
| 3376 | "__device__ int __nv_max(int __a, int __b);\n" |
| 3377 | "__device__ int __nv_min(int __a, int __b);\n" |
| 3378 | "__device__ double __nv_modf(double __a, double *__b);\n" |
| 3379 | "__device__ float __nv_modff(float __a, float *__b);\n" |
| 3380 | "__device__ int __nv_mul24(int __a, int __b);\n" |
| 3381 | "__device__ long long __nv_mul64hi(long long __a, long long __b);\n" |
| 3382 | "__device__ int __nv_mulhi(int __a, int __b);\n" |
| 3383 | "__device__ double __nv_nan(const signed char *__a);\n" |
| 3384 | "__device__ float __nv_nanf(const signed char *__a);\n" |
| 3385 | "__device__ double __nv_nearbyint(double __a);\n" |
| 3386 | "__device__ float __nv_nearbyintf(float __a);\n" |
| 3387 | "__device__ double __nv_nextafter(double __a, double __b);\n" |
| 3388 | "__device__ float __nv_nextafterf(float __a, float __b);\n" |
| 3389 | "__device__ double __nv_norm3d(double __a, double __b, double __c);\n" |
| 3390 | "__device__ float __nv_norm3df(float __a, float __b, float __c);\n" |
| 3391 | "__device__ double __nv_norm4d(double __a, double __b, double __c, double __d);\n" |
| 3392 | "__device__ float __nv_norm4df(float __a, float __b, float __c, float __d);\n" |
| 3393 | "__device__ double __nv_normcdf(double __a);\n" |
| 3394 | "__device__ float __nv_normcdff(float __a);\n" |
| 3395 | "__device__ double __nv_normcdfinv(double __a);\n" |
| 3396 | "__device__ float __nv_normcdfinvf(float __a);\n" |
| 3397 | "__device__ float __nv_normf(int __a, const float *__b);\n" |
| 3398 | "__device__ double __nv_norm(int __a, const double *__b);\n" |
| 3399 | "__device__ int __nv_popc(int __a);\n" |
| 3400 | "__device__ int __nv_popcll(long long __a);\n" |
| 3401 | "__device__ double __nv_pow(double __a, double __b);\n" |
| 3402 | "__device__ float __nv_powf(float __a, float __b);\n" |
| 3403 | "__device__ double __nv_powi(double __a, int __b);\n" |
| 3404 | "__device__ float __nv_powif(float __a, int __b);\n" |
| 3405 | "__device__ double __nv_rcbrt(double __a);\n" |
| 3406 | "__device__ float __nv_rcbrtf(float __a);\n" |
| 3407 | "__device__ double __nv_rcp64h(double __a);\n" |
| 3408 | "__device__ double __nv_remainder(double __a, double __b);\n" |
| 3409 | "__device__ float __nv_remainderf(float __a, float __b);\n" |
| 3410 | "__device__ double __nv_remquo(double __a, double __b, int *__c);\n" |
| 3411 | "__device__ float __nv_remquof(float __a, float __b, int *__c);\n" |
| 3412 | "__device__ int __nv_rhadd(int __a, int __b);\n" |
| 3413 | "__device__ double __nv_rhypot(double __a, double __b);\n" |
| 3414 | "__device__ float __nv_rhypotf(float __a, float __b);\n" |
| 3415 | "__device__ double __nv_rint(double __a);\n" |
| 3416 | "__device__ float __nv_rintf(float __a);\n" |
| 3417 | "__device__ double __nv_rnorm3d(double __a, double __b, double __c);\n" |
| 3418 | "__device__ float __nv_rnorm3df(float __a, float __b, float __c);\n" |
| 3419 | "__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d);\n" |
| 3420 | "__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d);\n" |
| 3421 | "__device__ float __nv_rnormf(int __a, const float *__b);\n" |
| 3422 | "__device__ double __nv_rnorm(int __a, const double *__b);\n" |
| 3423 | "__device__ double __nv_round(double __a);\n" |
| 3424 | "__device__ float __nv_roundf(float __a);\n" |
| 3425 | "__device__ double __nv_rsqrt(double __a);\n" |
| 3426 | "__device__ float __nv_rsqrtf(float __a);\n" |
| 3427 | "__device__ int __nv_sad(int __a, int __b, int __c);\n" |
| 3428 | "__device__ float __nv_saturatef(float __a);\n" |
| 3429 | "__device__ double __nv_scalbn(double __a, int __b);\n" |
| 3430 | "__device__ float __nv_scalbnf(float __a, int __b);\n" |
| 3431 | "__device__ int __nv_signbitd(double __a);\n" |
| 3432 | "__device__ int __nv_signbitf(float __a);\n" |
| 3433 | "__device__ void __nv_sincos(double __a, double *__b, double *__c);\n" |
| 3434 | "__device__ void __nv_sincosf(float __a, float *__b, float *__c);\n" |
| 3435 | "__device__ void __nv_sincospi(double __a, double *__b, double *__c);\n" |
| 3436 | "__device__ void __nv_sincospif(float __a, float *__b, float *__c);\n" |
| 3437 | "__device__ double __nv_sin(double __a);\n" |
| 3438 | "__device__ float __nv_sinf(float __a);\n" |
| 3439 | "__device__ double __nv_sinh(double __a);\n" |
| 3440 | "__device__ float __nv_sinhf(float __a);\n" |
| 3441 | "__device__ double __nv_sinpi(double __a);\n" |
| 3442 | "__device__ float __nv_sinpif(float __a);\n" |
| 3443 | "__device__ double __nv_sqrt(double __a);\n" |
| 3444 | "__device__ float __nv_sqrtf(float __a);\n" |
| 3445 | "__device__ double __nv_tan(double __a);\n" |
| 3446 | "__device__ float __nv_tanf(float __a);\n" |
| 3447 | "__device__ double __nv_tanh(double __a);\n" |
| 3448 | "__device__ float __nv_tanhf(float __a);\n" |
| 3449 | "__device__ double __nv_tgamma(double __a);\n" |
| 3450 | "__device__ float __nv_tgammaf(float __a);\n" |
| 3451 | "__device__ double __nv_trunc(double __a);\n" |
| 3452 | "__device__ float __nv_truncf(float __a);\n" |
| 3453 | "__device__ int __nv_uhadd(unsigned int __a, unsigned int __b);\n" |
| 3454 | "__device__ double __nv_uint2double_rn(unsigned int __i);\n" |
| 3455 | "__device__ float __nv_uint2float_rd(unsigned int __a);\n" |
| 3456 | "__device__ float __nv_uint2float_rn(unsigned int __a);\n" |
| 3457 | "__device__ float __nv_uint2float_ru(unsigned int __a);\n" |
| 3458 | "__device__ float __nv_uint2float_rz(unsigned int __a);\n" |
| 3459 | "__device__ float __nv_uint_as_float(unsigned int __a);\n" |
| 3460 | "__device__ double __nv_ull2double_rd(unsigned long long __a);\n" |
| 3461 | "__device__ double __nv_ull2double_rn(unsigned long long __a);\n" |
| 3462 | "__device__ double __nv_ull2double_ru(unsigned long long __a);\n" |
| 3463 | "__device__ double __nv_ull2double_rz(unsigned long long __a);\n" |
| 3464 | "__device__ float __nv_ull2float_rd(unsigned long long __a);\n" |
| 3465 | "__device__ float __nv_ull2float_rn(unsigned long long __a);\n" |
| 3466 | "__device__ float __nv_ull2float_ru(unsigned long long __a);\n" |
| 3467 | "__device__ float __nv_ull2float_rz(unsigned long long __a);\n" |
| 3468 | "__device__ unsigned long long __nv_ullmax(unsigned long long __a,\n" |
| 3469 | " unsigned long long __b);\n" |
| 3470 | "__device__ unsigned long long __nv_ullmin(unsigned long long __a,\n" |
| 3471 | " unsigned long long __b);\n" |
| 3472 | "__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b);\n" |
| 3473 | "__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b);\n" |
| 3474 | "__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b);\n" |
| 3475 | "__device__ unsigned long long __nv_umul64hi(unsigned long long __a,\n" |
| 3476 | " unsigned long long __b);\n" |
| 3477 | "__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);\n" |
| 3478 | "__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);\n" |
| 3479 | "__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b,\n" |
| 3480 | " unsigned int __c);\n" |
| 3481 | "#if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020\n" |
| 3482 | "__device__ int __nv_vabs2(int __a);\n" |
| 3483 | "__device__ int __nv_vabs4(int __a);\n" |
| 3484 | "__device__ int __nv_vabsdiffs2(int __a, int __b);\n" |
| 3485 | "__device__ int __nv_vabsdiffs4(int __a, int __b);\n" |
| 3486 | "__device__ int __nv_vabsdiffu2(int __a, int __b);\n" |
| 3487 | "__device__ int __nv_vabsdiffu4(int __a, int __b);\n" |
| 3488 | "__device__ int __nv_vabsss2(int __a);\n" |
| 3489 | "__device__ int __nv_vabsss4(int __a);\n" |
| 3490 | "__device__ int __nv_vadd2(int __a, int __b);\n" |
| 3491 | "__device__ int __nv_vadd4(int __a, int __b);\n" |
| 3492 | "__device__ int __nv_vaddss2(int __a, int __b);\n" |
| 3493 | "__device__ int __nv_vaddss4(int __a, int __b);\n" |
| 3494 | "__device__ int __nv_vaddus2(int __a, int __b);\n" |
| 3495 | "__device__ int __nv_vaddus4(int __a, int __b);\n" |
| 3496 | "__device__ int __nv_vavgs2(int __a, int __b);\n" |
| 3497 | "__device__ int __nv_vavgs4(int __a, int __b);\n" |
| 3498 | "__device__ int __nv_vavgu2(int __a, int __b);\n" |
| 3499 | "__device__ int __nv_vavgu4(int __a, int __b);\n" |
| 3500 | "__device__ int __nv_vcmpeq2(int __a, int __b);\n" |
| 3501 | "__device__ int __nv_vcmpeq4(int __a, int __b);\n" |
| 3502 | "__device__ int __nv_vcmpges2(int __a, int __b);\n" |
| 3503 | "__device__ int __nv_vcmpges4(int __a, int __b);\n" |
| 3504 | "__device__ int __nv_vcmpgeu2(int __a, int __b);\n" |
| 3505 | "__device__ int __nv_vcmpgeu4(int __a, int __b);\n" |
| 3506 | "__device__ int __nv_vcmpgts2(int __a, int __b);\n" |
| 3507 | "__device__ int __nv_vcmpgts4(int __a, int __b);\n" |
| 3508 | "__device__ int __nv_vcmpgtu2(int __a, int __b);\n" |
| 3509 | "__device__ int __nv_vcmpgtu4(int __a, int __b);\n" |
| 3510 | "__device__ int __nv_vcmples2(int __a, int __b);\n" |
| 3511 | "__device__ int __nv_vcmples4(int __a, int __b);\n" |
| 3512 | "__device__ int __nv_vcmpleu2(int __a, int __b);\n" |
| 3513 | "__device__ int __nv_vcmpleu4(int __a, int __b);\n" |
| 3514 | "__device__ int __nv_vcmplts2(int __a, int __b);\n" |
| 3515 | "__device__ int __nv_vcmplts4(int __a, int __b);\n" |
| 3516 | "__device__ int __nv_vcmpltu2(int __a, int __b);\n" |
| 3517 | "__device__ int __nv_vcmpltu4(int __a, int __b);\n" |
| 3518 | "__device__ int __nv_vcmpne2(int __a, int __b);\n" |
| 3519 | "__device__ int __nv_vcmpne4(int __a, int __b);\n" |
| 3520 | "__device__ int __nv_vhaddu2(int __a, int __b);\n" |
| 3521 | "__device__ int __nv_vhaddu4(int __a, int __b);\n" |
| 3522 | "__device__ int __nv_vmaxs2(int __a, int __b);\n" |
| 3523 | "__device__ int __nv_vmaxs4(int __a, int __b);\n" |
| 3524 | "__device__ int __nv_vmaxu2(int __a, int __b);\n" |
| 3525 | "__device__ int __nv_vmaxu4(int __a, int __b);\n" |
| 3526 | "__device__ int __nv_vmins2(int __a, int __b);\n" |
| 3527 | "__device__ int __nv_vmins4(int __a, int __b);\n" |
| 3528 | "__device__ int __nv_vminu2(int __a, int __b);\n" |
| 3529 | "__device__ int __nv_vminu4(int __a, int __b);\n" |
| 3530 | "__device__ int __nv_vneg2(int __a);\n" |
| 3531 | "__device__ int __nv_vneg4(int __a);\n" |
| 3532 | "__device__ int __nv_vnegss2(int __a);\n" |
| 3533 | "__device__ int __nv_vnegss4(int __a);\n" |
| 3534 | "__device__ int __nv_vsads2(int __a, int __b);\n" |
| 3535 | "__device__ int __nv_vsads4(int __a, int __b);\n" |
| 3536 | "__device__ int __nv_vsadu2(int __a, int __b);\n" |
| 3537 | "__device__ int __nv_vsadu4(int __a, int __b);\n" |
| 3538 | "__device__ int __nv_vseteq2(int __a, int __b);\n" |
| 3539 | "__device__ int __nv_vseteq4(int __a, int __b);\n" |
| 3540 | "__device__ int __nv_vsetges2(int __a, int __b);\n" |
| 3541 | "__device__ int __nv_vsetges4(int __a, int __b);\n" |
| 3542 | "__device__ int __nv_vsetgeu2(int __a, int __b);\n" |
| 3543 | "__device__ int __nv_vsetgeu4(int __a, int __b);\n" |
| 3544 | "__device__ int __nv_vsetgts2(int __a, int __b);\n" |
| 3545 | "__device__ int __nv_vsetgts4(int __a, int __b);\n" |
| 3546 | "__device__ int __nv_vsetgtu2(int __a, int __b);\n" |
| 3547 | "__device__ int __nv_vsetgtu4(int __a, int __b);\n" |
| 3548 | "__device__ int __nv_vsetles2(int __a, int __b);\n" |
| 3549 | "__device__ int __nv_vsetles4(int __a, int __b);\n" |
| 3550 | "__device__ int __nv_vsetleu2(int __a, int __b);\n" |
| 3551 | "__device__ int __nv_vsetleu4(int __a, int __b);\n" |
| 3552 | "__device__ int __nv_vsetlts2(int __a, int __b);\n" |
| 3553 | "__device__ int __nv_vsetlts4(int __a, int __b);\n" |
| 3554 | "__device__ int __nv_vsetltu2(int __a, int __b);\n" |
| 3555 | "__device__ int __nv_vsetltu4(int __a, int __b);\n" |
| 3556 | "__device__ int __nv_vsetne2(int __a, int __b);\n" |
| 3557 | "__device__ int __nv_vsetne4(int __a, int __b);\n" |
| 3558 | "__device__ int __nv_vsub2(int __a, int __b);\n" |
| 3559 | "__device__ int __nv_vsub4(int __a, int __b);\n" |
| 3560 | "__device__ int __nv_vsubss2(int __a, int __b);\n" |
| 3561 | "__device__ int __nv_vsubss4(int __a, int __b);\n" |
| 3562 | "__device__ int __nv_vsubus2(int __a, int __b);\n" |
| 3563 | "__device__ int __nv_vsubus4(int __a, int __b);\n" |
| 3564 | "#endif // CUDA_VERSION\n" |
| 3565 | "__device__ double __nv_y0(double __a);\n" |
| 3566 | "__device__ float __nv_y0f(float __a);\n" |
| 3567 | "__device__ double __nv_y1(double __a);\n" |
| 3568 | "__device__ float __nv_y1f(float __a);\n" |
| 3569 | "__device__ float __nv_ynf(int __a, float __b);\n" |
| 3570 | "__device__ double __nv_yn(int __a, double __b);\n" |
| 3571 | "} // extern \"C\"\n" |
| 3572 | "#endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__\n" |
| 3573 | "" } , |
| 3574 | { "/builtins/__clang_cuda_math_forward_declares.h" , "/*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --===\n" |
| 3575 | " *\n" |
| 3576 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 3577 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 3578 | " * in the Software without restriction, including without limitation the rights\n" |
| 3579 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 3580 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 3581 | " * furnished to do so, subject to the following conditions:\n" |
| 3582 | " *\n" |
| 3583 | " * The above copyright notice and this permission notice shall be included in\n" |
| 3584 | " * all copies or substantial portions of the Software.\n" |
| 3585 | " *\n" |
| 3586 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 3587 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 3588 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 3589 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 3590 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 3591 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 3592 | " * THE SOFTWARE.\n" |
| 3593 | " *\n" |
| 3594 | " *===-----------------------------------------------------------------------===\n" |
| 3595 | " */\n" |
| 3596 | "#ifndef __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n" |
| 3597 | "#define __CLANG__CUDA_MATH_FORWARD_DECLARES_H__\n" |
| 3598 | "#ifndef __CUDA__\n" |
| 3599 | "#error \"This file is for CUDA compilation only.\"\n" |
| 3600 | "#endif\n" |
| 3601 | "\n" |
| 3602 | "// This file forward-declares of some math functions we (or the CUDA headers)\n" |
| 3603 | "// will define later. We need to do this, and do it before cmath is included,\n" |
| 3604 | "// because the standard library may have constexpr math functions. In the\n" |
| 3605 | "// absence of a prior __device__ decl, those constexpr functions may become\n" |
| 3606 | "// implicitly host+device. host+device functions can't be overloaded, so that\n" |
| 3607 | "// would preclude the use of our own __device__ overloads for these functions.\n" |
| 3608 | "\n" |
| 3609 | "#pragma push_macro(\"__DEVICE__\")\n" |
| 3610 | "#define __DEVICE__ \\\n" |
| 3611 | " static __inline__ __attribute__((always_inline)) __attribute__((device))\n" |
| 3612 | "\n" |
| 3613 | "__DEVICE__ double abs(double);\n" |
| 3614 | "__DEVICE__ float abs(float);\n" |
| 3615 | "__DEVICE__ int abs(int);\n" |
| 3616 | "__DEVICE__ long abs(long);\n" |
| 3617 | "__DEVICE__ long long abs(long long);\n" |
| 3618 | "__DEVICE__ double acos(double);\n" |
| 3619 | "__DEVICE__ float acos(float);\n" |
| 3620 | "__DEVICE__ double acosh(double);\n" |
| 3621 | "__DEVICE__ float acosh(float);\n" |
| 3622 | "__DEVICE__ double asin(double);\n" |
| 3623 | "__DEVICE__ float asin(float);\n" |
| 3624 | "__DEVICE__ double asinh(double);\n" |
| 3625 | "__DEVICE__ float asinh(float);\n" |
| 3626 | "__DEVICE__ double atan2(double, double);\n" |
| 3627 | "__DEVICE__ float atan2(float, float);\n" |
| 3628 | "__DEVICE__ double atan(double);\n" |
| 3629 | "__DEVICE__ float atan(float);\n" |
| 3630 | "__DEVICE__ double atanh(double);\n" |
| 3631 | "__DEVICE__ float atanh(float);\n" |
| 3632 | "__DEVICE__ double cbrt(double);\n" |
| 3633 | "__DEVICE__ float cbrt(float);\n" |
| 3634 | "__DEVICE__ double ceil(double);\n" |
| 3635 | "__DEVICE__ float ceil(float);\n" |
| 3636 | "__DEVICE__ double copysign(double, double);\n" |
| 3637 | "__DEVICE__ float copysign(float, float);\n" |
| 3638 | "__DEVICE__ double cos(double);\n" |
| 3639 | "__DEVICE__ float cos(float);\n" |
| 3640 | "__DEVICE__ double cosh(double);\n" |
| 3641 | "__DEVICE__ float cosh(float);\n" |
| 3642 | "__DEVICE__ double erfc(double);\n" |
| 3643 | "__DEVICE__ float erfc(float);\n" |
| 3644 | "__DEVICE__ double erf(double);\n" |
| 3645 | "__DEVICE__ float erf(float);\n" |
| 3646 | "__DEVICE__ double exp2(double);\n" |
| 3647 | "__DEVICE__ float exp2(float);\n" |
| 3648 | "__DEVICE__ double exp(double);\n" |
| 3649 | "__DEVICE__ float exp(float);\n" |
| 3650 | "__DEVICE__ double expm1(double);\n" |
| 3651 | "__DEVICE__ float expm1(float);\n" |
| 3652 | "__DEVICE__ double fabs(double);\n" |
| 3653 | "__DEVICE__ float fabs(float);\n" |
| 3654 | "__DEVICE__ double fdim(double, double);\n" |
| 3655 | "__DEVICE__ float fdim(float, float);\n" |
| 3656 | "__DEVICE__ double floor(double);\n" |
| 3657 | "__DEVICE__ float floor(float);\n" |
| 3658 | "__DEVICE__ double fma(double, double, double);\n" |
| 3659 | "__DEVICE__ float fma(float, float, float);\n" |
| 3660 | "__DEVICE__ double fmax(double, double);\n" |
| 3661 | "__DEVICE__ float fmax(float, float);\n" |
| 3662 | "__DEVICE__ double fmin(double, double);\n" |
| 3663 | "__DEVICE__ float fmin(float, float);\n" |
| 3664 | "__DEVICE__ double fmod(double, double);\n" |
| 3665 | "__DEVICE__ float fmod(float, float);\n" |
| 3666 | "__DEVICE__ int fpclassify(double);\n" |
| 3667 | "__DEVICE__ int fpclassify(float);\n" |
| 3668 | "__DEVICE__ double frexp(double, int *);\n" |
| 3669 | "__DEVICE__ float frexp(float, int *);\n" |
| 3670 | "__DEVICE__ double hypot(double, double);\n" |
| 3671 | "__DEVICE__ float hypot(float, float);\n" |
| 3672 | "__DEVICE__ int ilogb(double);\n" |
| 3673 | "__DEVICE__ int ilogb(float);\n" |
| 3674 | "__DEVICE__ bool isfinite(double);\n" |
| 3675 | "__DEVICE__ bool isfinite(float);\n" |
| 3676 | "__DEVICE__ bool isgreater(double, double);\n" |
| 3677 | "__DEVICE__ bool isgreaterequal(double, double);\n" |
| 3678 | "__DEVICE__ bool isgreaterequal(float, float);\n" |
| 3679 | "__DEVICE__ bool isgreater(float, float);\n" |
| 3680 | "__DEVICE__ bool isinf(double);\n" |
| 3681 | "__DEVICE__ bool isinf(float);\n" |
| 3682 | "__DEVICE__ bool isless(double, double);\n" |
| 3683 | "__DEVICE__ bool islessequal(double, double);\n" |
| 3684 | "__DEVICE__ bool islessequal(float, float);\n" |
| 3685 | "__DEVICE__ bool isless(float, float);\n" |
| 3686 | "__DEVICE__ bool islessgreater(double, double);\n" |
| 3687 | "__DEVICE__ bool islessgreater(float, float);\n" |
| 3688 | "__DEVICE__ bool isnan(double);\n" |
| 3689 | "__DEVICE__ bool isnan(float);\n" |
| 3690 | "__DEVICE__ bool isnormal(double);\n" |
| 3691 | "__DEVICE__ bool isnormal(float);\n" |
| 3692 | "__DEVICE__ bool isunordered(double, double);\n" |
| 3693 | "__DEVICE__ bool isunordered(float, float);\n" |
| 3694 | "__DEVICE__ long labs(long);\n" |
| 3695 | "__DEVICE__ double ldexp(double, int);\n" |
| 3696 | "__DEVICE__ float ldexp(float, int);\n" |
| 3697 | "__DEVICE__ double lgamma(double);\n" |
| 3698 | "__DEVICE__ float lgamma(float);\n" |
| 3699 | "__DEVICE__ long long llabs(long long);\n" |
| 3700 | "__DEVICE__ long long llrint(double);\n" |
| 3701 | "__DEVICE__ long long llrint(float);\n" |
| 3702 | "__DEVICE__ double log10(double);\n" |
| 3703 | "__DEVICE__ float log10(float);\n" |
| 3704 | "__DEVICE__ double log1p(double);\n" |
| 3705 | "__DEVICE__ float log1p(float);\n" |
| 3706 | "__DEVICE__ double log2(double);\n" |
| 3707 | "__DEVICE__ float log2(float);\n" |
| 3708 | "__DEVICE__ double logb(double);\n" |
| 3709 | "__DEVICE__ float logb(float);\n" |
| 3710 | "__DEVICE__ double log(double);\n" |
| 3711 | "__DEVICE__ float log(float);\n" |
| 3712 | "__DEVICE__ long lrint(double);\n" |
| 3713 | "__DEVICE__ long lrint(float);\n" |
| 3714 | "__DEVICE__ long lround(double);\n" |
| 3715 | "__DEVICE__ long lround(float);\n" |
| 3716 | "__DEVICE__ long long llround(float); // No llround(double).\n" |
| 3717 | "__DEVICE__ double modf(double, double *);\n" |
| 3718 | "__DEVICE__ float modf(float, float *);\n" |
| 3719 | "__DEVICE__ double nan(const char *);\n" |
| 3720 | "__DEVICE__ float nanf(const char *);\n" |
| 3721 | "__DEVICE__ double nearbyint(double);\n" |
| 3722 | "__DEVICE__ float nearbyint(float);\n" |
| 3723 | "__DEVICE__ double nextafter(double, double);\n" |
| 3724 | "__DEVICE__ float nextafter(float, float);\n" |
| 3725 | "__DEVICE__ double pow(double, double);\n" |
| 3726 | "__DEVICE__ double pow(double, int);\n" |
| 3727 | "__DEVICE__ float pow(float, float);\n" |
| 3728 | "__DEVICE__ float pow(float, int);\n" |
| 3729 | "__DEVICE__ double remainder(double, double);\n" |
| 3730 | "__DEVICE__ float remainder(float, float);\n" |
| 3731 | "__DEVICE__ double remquo(double, double, int *);\n" |
| 3732 | "__DEVICE__ float remquo(float, float, int *);\n" |
| 3733 | "__DEVICE__ double rint(double);\n" |
| 3734 | "__DEVICE__ float rint(float);\n" |
| 3735 | "__DEVICE__ double round(double);\n" |
| 3736 | "__DEVICE__ float round(float);\n" |
| 3737 | "__DEVICE__ double scalbln(double, long);\n" |
| 3738 | "__DEVICE__ float scalbln(float, long);\n" |
| 3739 | "__DEVICE__ double scalbn(double, int);\n" |
| 3740 | "__DEVICE__ float scalbn(float, int);\n" |
| 3741 | "__DEVICE__ bool signbit(double);\n" |
| 3742 | "__DEVICE__ bool signbit(float);\n" |
| 3743 | "__DEVICE__ double sin(double);\n" |
| 3744 | "__DEVICE__ float sin(float);\n" |
| 3745 | "__DEVICE__ double sinh(double);\n" |
| 3746 | "__DEVICE__ float sinh(float);\n" |
| 3747 | "__DEVICE__ double sqrt(double);\n" |
| 3748 | "__DEVICE__ float sqrt(float);\n" |
| 3749 | "__DEVICE__ double tan(double);\n" |
| 3750 | "__DEVICE__ float tan(float);\n" |
| 3751 | "__DEVICE__ double tanh(double);\n" |
| 3752 | "__DEVICE__ float tanh(float);\n" |
| 3753 | "__DEVICE__ double tgamma(double);\n" |
| 3754 | "__DEVICE__ float tgamma(float);\n" |
| 3755 | "__DEVICE__ double trunc(double);\n" |
| 3756 | "__DEVICE__ float trunc(float);\n" |
| 3757 | "\n" |
| 3758 | "// Notably missing above is nexttoward, which we don't define on\n" |
| 3759 | "// the device side because libdevice doesn't give us an implementation, and we\n" |
| 3760 | "// don't want to be in the business of writing one ourselves.\n" |
| 3761 | "\n" |
| 3762 | "// We need to define these overloads in exactly the namespace our standard\n" |
| 3763 | "// library uses (including the right inline namespace), otherwise they won't be\n" |
| 3764 | "// picked up by other functions in the standard library (e.g. functions in\n" |
| 3765 | "// <complex>). Thus the ugliness below.\n" |
| 3766 | "#ifdef _LIBCPP_BEGIN_NAMESPACE_STD\n" |
| 3767 | "_LIBCPP_BEGIN_NAMESPACE_STD\n" |
| 3768 | "#else\n" |
| 3769 | "namespace std {\n" |
| 3770 | "#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
| 3771 | "_GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
| 3772 | "#endif\n" |
| 3773 | "#endif\n" |
| 3774 | "\n" |
| 3775 | "using ::abs;\n" |
| 3776 | "using ::acos;\n" |
| 3777 | "using ::acosh;\n" |
| 3778 | "using ::asin;\n" |
| 3779 | "using ::asinh;\n" |
| 3780 | "using ::atan;\n" |
| 3781 | "using ::atan2;\n" |
| 3782 | "using ::atanh;\n" |
| 3783 | "using ::cbrt;\n" |
| 3784 | "using ::ceil;\n" |
| 3785 | "using ::copysign;\n" |
| 3786 | "using ::cos;\n" |
| 3787 | "using ::cosh;\n" |
| 3788 | "using ::erf;\n" |
| 3789 | "using ::erfc;\n" |
| 3790 | "using ::exp;\n" |
| 3791 | "using ::exp2;\n" |
| 3792 | "using ::expm1;\n" |
| 3793 | "using ::fabs;\n" |
| 3794 | "using ::fdim;\n" |
| 3795 | "using ::floor;\n" |
| 3796 | "using ::fma;\n" |
| 3797 | "using ::fmax;\n" |
| 3798 | "using ::fmin;\n" |
| 3799 | "using ::fmod;\n" |
| 3800 | "using ::fpclassify;\n" |
| 3801 | "using ::frexp;\n" |
| 3802 | "using ::hypot;\n" |
| 3803 | "using ::ilogb;\n" |
| 3804 | "using ::isfinite;\n" |
| 3805 | "using ::isgreater;\n" |
| 3806 | "using ::isgreaterequal;\n" |
| 3807 | "using ::isinf;\n" |
| 3808 | "using ::isless;\n" |
| 3809 | "using ::islessequal;\n" |
| 3810 | "using ::islessgreater;\n" |
| 3811 | "using ::isnan;\n" |
| 3812 | "using ::isnormal;\n" |
| 3813 | "using ::isunordered;\n" |
| 3814 | "using ::labs;\n" |
| 3815 | "using ::ldexp;\n" |
| 3816 | "using ::lgamma;\n" |
| 3817 | "using ::llabs;\n" |
| 3818 | "using ::llrint;\n" |
| 3819 | "using ::log;\n" |
| 3820 | "using ::log10;\n" |
| 3821 | "using ::log1p;\n" |
| 3822 | "using ::log2;\n" |
| 3823 | "using ::logb;\n" |
| 3824 | "using ::lrint;\n" |
| 3825 | "using ::lround;\n" |
| 3826 | "using ::llround;\n" |
| 3827 | "using ::modf;\n" |
| 3828 | "using ::nan;\n" |
| 3829 | "using ::nanf;\n" |
| 3830 | "using ::nearbyint;\n" |
| 3831 | "using ::nextafter;\n" |
| 3832 | "using ::pow;\n" |
| 3833 | "using ::remainder;\n" |
| 3834 | "using ::remquo;\n" |
| 3835 | "using ::rint;\n" |
| 3836 | "using ::round;\n" |
| 3837 | "using ::scalbln;\n" |
| 3838 | "using ::scalbn;\n" |
| 3839 | "using ::signbit;\n" |
| 3840 | "using ::sin;\n" |
| 3841 | "using ::sinh;\n" |
| 3842 | "using ::sqrt;\n" |
| 3843 | "using ::tan;\n" |
| 3844 | "using ::tanh;\n" |
| 3845 | "using ::tgamma;\n" |
| 3846 | "using ::trunc;\n" |
| 3847 | "\n" |
| 3848 | "#ifdef _LIBCPP_END_NAMESPACE_STD\n" |
| 3849 | "_LIBCPP_END_NAMESPACE_STD\n" |
| 3850 | "#else\n" |
| 3851 | "#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION\n" |
| 3852 | "_GLIBCXX_END_NAMESPACE_VERSION\n" |
| 3853 | "#endif\n" |
| 3854 | "} // namespace std\n" |
| 3855 | "#endif\n" |
| 3856 | "\n" |
| 3857 | "#pragma pop_macro(\"__DEVICE__\")\n" |
| 3858 | "\n" |
| 3859 | "#endif\n" |
| 3860 | "" } , |
| 3861 | { "/builtins/__clang_cuda_runtime_wrapper.h" , "/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===\n" |
| 3862 | " *\n" |
| 3863 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 3864 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 3865 | " * in the Software without restriction, including without limitation the rights\n" |
| 3866 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 3867 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 3868 | " * furnished to do so, subject to the following conditions:\n" |
| 3869 | " *\n" |
| 3870 | " * The above copyright notice and this permission notice shall be included in\n" |
| 3871 | " * all copies or substantial portions of the Software.\n" |
| 3872 | " *\n" |
| 3873 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 3874 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 3875 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 3876 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 3877 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 3878 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 3879 | " * THE SOFTWARE.\n" |
| 3880 | " *\n" |
| 3881 | " *===-----------------------------------------------------------------------===\n" |
| 3882 | " */\n" |
| 3883 | "\n" |
| 3884 | "/*\n" |
| 3885 | " * WARNING: This header is intended to be directly -include'd by\n" |
| 3886 | " * the compiler and is not supposed to be included by users.\n" |
| 3887 | " *\n" |
| 3888 | " * CUDA headers are implemented in a way that currently makes it\n" |
| 3889 | " * impossible for user code to #include directly when compiling with\n" |
| 3890 | " * Clang. They present different view of CUDA-supplied functions\n" |
| 3891 | " * depending on where in NVCC's compilation pipeline the headers are\n" |
| 3892 | " * included. Neither of these modes provides function definitions with\n" |
| 3893 | " * correct attributes, so we use preprocessor to force the headers\n" |
| 3894 | " * into a form that Clang can use.\n" |
| 3895 | " *\n" |
| 3896 | " * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's\n" |
| 3897 | " * this file during every CUDA compilation.\n" |
| 3898 | " */\n" |
| 3899 | "\n" |
| 3900 | "#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__\n" |
| 3901 | "#define __CLANG_CUDA_RUNTIME_WRAPPER_H__\n" |
| 3902 | "\n" |
| 3903 | "#if defined(__CUDA__) && defined(__clang__)\n" |
| 3904 | "\n" |
| 3905 | "// Include some forward declares that must come before cmath.\n" |
| 3906 | "#include <__clang_cuda_math_forward_declares.h>\n" |
| 3907 | "\n" |
| 3908 | "// Include some standard headers to avoid CUDA headers including them\n" |
| 3909 | "// while some required macros (like __THROW) are in a weird state.\n" |
| 3910 | "#include <cmath>\n" |
| 3911 | "#include <cstdlib>\n" |
| 3912 | "#include <stdlib.h>\n" |
| 3913 | "\n" |
| 3914 | "// Preserve common macros that will be changed below by us or by CUDA\n" |
| 3915 | "// headers.\n" |
| 3916 | "#pragma push_macro(\"__THROW\")\n" |
| 3917 | "#pragma push_macro(\"__CUDA_ARCH__\")\n" |
| 3918 | "\n" |
| 3919 | "// WARNING: Preprocessor hacks below are based on specific details of\n" |
| 3920 | "// CUDA-7.x headers and are not expected to work with any other\n" |
| 3921 | "// version of CUDA headers.\n" |
| 3922 | "#include \"cuda.h\"\n" |
| 3923 | "#if !defined(CUDA_VERSION)\n" |
| 3924 | "#error \"cuda.h did not define CUDA_VERSION\"\n" |
| 3925 | "#elif CUDA_VERSION < 7000 || CUDA_VERSION > 9020\n" |
| 3926 | "#error \"Unsupported CUDA version!\"\n" |
| 3927 | "#endif\n" |
| 3928 | "\n" |
| 3929 | "// Make largest subset of device functions available during host\n" |
| 3930 | "// compilation -- SM_35 for the time being.\n" |
| 3931 | "#ifndef __CUDA_ARCH__\n" |
| 3932 | "#define __CUDA_ARCH__ 350\n" |
| 3933 | "#endif\n" |
| 3934 | "\n" |
| 3935 | "#include \"__clang_cuda_builtin_vars.h\"\n" |
| 3936 | "\n" |
| 3937 | "// No need for device_launch_parameters.h as __clang_cuda_builtin_vars.h above\n" |
| 3938 | "// has taken care of builtin variables declared in the file.\n" |
| 3939 | "#define __DEVICE_LAUNCH_PARAMETERS_H__\n" |
| 3940 | "\n" |
| 3941 | "// {math,device}_functions.h only have declarations of the\n" |
| 3942 | "// functions. We don't need them as we're going to pull in their\n" |
| 3943 | "// definitions from .hpp files.\n" |
| 3944 | "#define __DEVICE_FUNCTIONS_H__\n" |
| 3945 | "#define __MATH_FUNCTIONS_H__\n" |
| 3946 | "#define __COMMON_FUNCTIONS_H__\n" |
| 3947 | "// device_functions_decls is replaced by __clang_cuda_device_functions.h\n" |
| 3948 | "// included below.\n" |
| 3949 | "#define __DEVICE_FUNCTIONS_DECLS_H__\n" |
| 3950 | "\n" |
| 3951 | "#undef __CUDACC__\n" |
| 3952 | "#if CUDA_VERSION < 9000\n" |
| 3953 | "#define __CUDABE__\n" |
| 3954 | "#else\n" |
| 3955 | "#define __CUDA_LIBDEVICE__\n" |
| 3956 | "#endif\n" |
| 3957 | "// Disables definitions of device-side runtime support stubs in\n" |
| 3958 | "// cuda_device_runtime_api.h\n" |
| 3959 | "#include \"driver_types.h\"\n" |
| 3960 | "#include \"host_config.h\"\n" |
| 3961 | "#include \"host_defines.h\"\n" |
| 3962 | "\n" |
| 3963 | "// Temporarily replace \"nv_weak\" with weak, so __attribute__((nv_weak)) in\n" |
| 3964 | "// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the\n" |
| 3965 | "// functional equivalent of what we need.\n" |
| 3966 | "#pragma push_macro(\"nv_weak\")\n" |
| 3967 | "#define nv_weak weak\n" |
| 3968 | "#undef __CUDABE__\n" |
| 3969 | "#undef __CUDA_LIBDEVICE__\n" |
| 3970 | "#define __CUDACC__\n" |
| 3971 | "#include \"cuda_runtime.h\"\n" |
| 3972 | "\n" |
| 3973 | "#pragma pop_macro(\"nv_weak\")\n" |
| 3974 | "#undef __CUDACC__\n" |
| 3975 | "#define __CUDABE__\n" |
| 3976 | "\n" |
| 3977 | "// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does\n" |
| 3978 | "// not have at the moment. Emulate them with a builtin memcpy/memset.\n" |
| 3979 | "#define __nvvm_memcpy(s, d, n, a) __builtin_memcpy(s, d, n)\n" |
| 3980 | "#define __nvvm_memset(d, c, n, a) __builtin_memset(d, c, n)\n" |
| 3981 | "\n" |
| 3982 | "#if CUDA_VERSION < 9000\n" |
| 3983 | "#include \"crt/device_runtime.h\"\n" |
| 3984 | "#endif\n" |
| 3985 | "#include \"crt/host_runtime.h\"\n" |
| 3986 | "// device_runtime.h defines __cxa_* macros that will conflict with\n" |
| 3987 | "// cxxabi.h.\n" |
| 3988 | "// FIXME: redefine these as __device__ functions.\n" |
| 3989 | "#undef __cxa_vec_ctor\n" |
| 3990 | "#undef __cxa_vec_cctor\n" |
| 3991 | "#undef __cxa_vec_dtor\n" |
| 3992 | "#undef __cxa_vec_new\n" |
| 3993 | "#undef __cxa_vec_new2\n" |
| 3994 | "#undef __cxa_vec_new3\n" |
| 3995 | "#undef __cxa_vec_delete2\n" |
| 3996 | "#undef __cxa_vec_delete\n" |
| 3997 | "#undef __cxa_vec_delete3\n" |
| 3998 | "#undef __cxa_pure_virtual\n" |
| 3999 | "\n" |
| 4000 | "// math_functions.hpp expects this host function be defined on MacOS, but it\n" |
| 4001 | "// ends up not being there because of the games we play here. Just define it\n" |
| 4002 | "// ourselves; it's simple enough.\n" |
| 4003 | "#ifdef __APPLE__\n" |
| 4004 | "inline __host__ double __signbitd(double x) {\n" |
| 4005 | " return std::signbit(x);\n" |
| 4006 | "}\n" |
| 4007 | "#endif\n" |
| 4008 | "\n" |
| 4009 | "// CUDA 9.1 no longer provides declarations for libdevice functions, so we need\n" |
| 4010 | "// to provide our own.\n" |
| 4011 | "#include <__clang_cuda_libdevice_declares.h>\n" |
| 4012 | "\n" |
| 4013 | "// Wrappers for many device-side standard library functions became compiler\n" |
| 4014 | "// builtins in CUDA-9 and have been removed from the CUDA headers. Clang now\n" |
| 4015 | "// provides its own implementation of the wrappers.\n" |
| 4016 | "#if CUDA_VERSION >= 9000\n" |
| 4017 | "#include <__clang_cuda_device_functions.h>\n" |
| 4018 | "#endif\n" |
| 4019 | "\n" |
| 4020 | "// __THROW is redefined to be empty by device_functions_decls.h in CUDA. Clang's\n" |
| 4021 | "// counterpart does not do it, so we need to make it empty here to keep\n" |
| 4022 | "// following CUDA includes happy.\n" |
| 4023 | "#undef __THROW\n" |
| 4024 | "#define __THROW\n" |
| 4025 | "\n" |
| 4026 | "// CUDA 8.0.41 relies on __USE_FAST_MATH__ and __CUDA_PREC_DIV's values.\n" |
| 4027 | "// Previous versions used to check whether they are defined or not.\n" |
| 4028 | "// CU_DEVICE_INVALID macro is only defined in 8.0.41, so we use it\n" |
| 4029 | "// here to detect the switch.\n" |
| 4030 | "\n" |
| 4031 | "#if defined(CU_DEVICE_INVALID)\n" |
| 4032 | "#if !defined(__USE_FAST_MATH__)\n" |
| 4033 | "#define __USE_FAST_MATH__ 0\n" |
| 4034 | "#endif\n" |
| 4035 | "\n" |
| 4036 | "#if !defined(__CUDA_PREC_DIV)\n" |
| 4037 | "#define __CUDA_PREC_DIV 0\n" |
| 4038 | "#endif\n" |
| 4039 | "#endif\n" |
| 4040 | "\n" |
| 4041 | "// Temporarily poison __host__ macro to ensure it's not used by any of\n" |
| 4042 | "// the headers we're about to include.\n" |
| 4043 | "#pragma push_macro(\"__host__\")\n" |
| 4044 | "#define __host__ UNEXPECTED_HOST_ATTRIBUTE\n" |
| 4045 | "\n" |
| 4046 | "// device_functions.hpp and math_functions*.hpp use 'static\n" |
| 4047 | "// __forceinline__' (with no __device__) for definitions of device\n" |
| 4048 | "// functions. Temporarily redefine __forceinline__ to include\n" |
| 4049 | "// __device__.\n" |
| 4050 | "#pragma push_macro(\"__forceinline__\")\n" |
| 4051 | "#define __forceinline__ __device__ __inline__ __attribute__((always_inline))\n" |
| 4052 | "#if CUDA_VERSION < 9000\n" |
| 4053 | "#include \"device_functions.hpp\"\n" |
| 4054 | "#endif\n" |
| 4055 | "\n" |
| 4056 | "// math_function.hpp uses the __USE_FAST_MATH__ macro to determine whether we\n" |
| 4057 | "// get the slow-but-accurate or fast-but-inaccurate versions of functions like\n" |
| 4058 | "// sin and exp. This is controlled in clang by -fcuda-approx-transcendentals.\n" |
| 4059 | "//\n" |
| 4060 | "// device_functions.hpp uses __USE_FAST_MATH__ for a different purpose (fast vs.\n" |
| 4061 | "// slow divides), so we need to scope our define carefully here.\n" |
| 4062 | "#pragma push_macro(\"__USE_FAST_MATH__\")\n" |
| 4063 | "#if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)\n" |
| 4064 | "#define __USE_FAST_MATH__ 1\n" |
| 4065 | "#endif\n" |
| 4066 | "\n" |
| 4067 | "#if CUDA_VERSION >= 9000\n" |
| 4068 | "// CUDA-9.2 needs host-side memcpy for some host functions in\n" |
| 4069 | "// device_functions.hpp\n" |
| 4070 | "#if CUDA_VERSION >= 9020\n" |
| 4071 | "#include <string.h>\n" |
| 4072 | "#endif\n" |
| 4073 | "#include \"crt/math_functions.hpp\"\n" |
| 4074 | "#else\n" |
| 4075 | "#include \"math_functions.hpp\"\n" |
| 4076 | "#endif\n" |
| 4077 | "\n" |
| 4078 | "#pragma pop_macro(\"__USE_FAST_MATH__\")\n" |
| 4079 | "\n" |
| 4080 | "#if CUDA_VERSION < 9000\n" |
| 4081 | "#include \"math_functions_dbl_ptx3.hpp\"\n" |
| 4082 | "#endif\n" |
| 4083 | "#pragma pop_macro(\"__forceinline__\")\n" |
| 4084 | "\n" |
| 4085 | "// Pull in host-only functions that are only available when neither\n" |
| 4086 | "// __CUDACC__ nor __CUDABE__ are defined.\n" |
| 4087 | "#undef __MATH_FUNCTIONS_HPP__\n" |
| 4088 | "#undef __CUDABE__\n" |
| 4089 | "#if CUDA_VERSION < 9000\n" |
| 4090 | "#include \"math_functions.hpp\"\n" |
| 4091 | "#endif\n" |
| 4092 | "// Alas, additional overloads for these functions are hard to get to.\n" |
| 4093 | "// Considering that we only need these overloads for a few functions,\n" |
| 4094 | "// we can provide them here.\n" |
| 4095 | "static inline float rsqrt(float __a) { return rsqrtf(__a); }\n" |
| 4096 | "static inline float rcbrt(float __a) { return rcbrtf(__a); }\n" |
| 4097 | "static inline float sinpi(float __a) { return sinpif(__a); }\n" |
| 4098 | "static inline float cospi(float __a) { return cospif(__a); }\n" |
| 4099 | "static inline void sincospi(float __a, float *__b, float *__c) {\n" |
| 4100 | " return sincospif(__a, __b, __c);\n" |
| 4101 | "}\n" |
| 4102 | "static inline float erfcinv(float __a) { return erfcinvf(__a); }\n" |
| 4103 | "static inline float normcdfinv(float __a) { return normcdfinvf(__a); }\n" |
| 4104 | "static inline float normcdf(float __a) { return normcdff(__a); }\n" |
| 4105 | "static inline float erfcx(float __a) { return erfcxf(__a); }\n" |
| 4106 | "\n" |
| 4107 | "#if CUDA_VERSION < 9000\n" |
| 4108 | "// For some reason single-argument variant is not always declared by\n" |
| 4109 | "// CUDA headers. Alas, device_functions.hpp included below needs it.\n" |
| 4110 | "static inline __device__ void __brkpt(int __c) { __brkpt(); }\n" |
| 4111 | "#endif\n" |
| 4112 | "\n" |
| 4113 | "// Now include *.hpp with definitions of various GPU functions. Alas,\n" |
| 4114 | "// a lot of thins get declared/defined with __host__ attribute which\n" |
| 4115 | "// we don't want and we have to define it out. We also have to include\n" |
| 4116 | "// {device,math}_functions.hpp again in order to extract the other\n" |
| 4117 | "// branch of #if/else inside.\n" |
| 4118 | "#define __host__\n" |
| 4119 | "#undef __CUDABE__\n" |
| 4120 | "#define __CUDACC__\n" |
| 4121 | "#if CUDA_VERSION >= 9000\n" |
| 4122 | "// Some atomic functions became compiler builtins in CUDA-9 , so we need their\n" |
| 4123 | "// declarations.\n" |
| 4124 | "#include \"device_atomic_functions.h\"\n" |
| 4125 | "#endif\n" |
| 4126 | "#undef __DEVICE_FUNCTIONS_HPP__\n" |
| 4127 | "#include \"device_atomic_functions.hpp\"\n" |
| 4128 | "#if CUDA_VERSION >= 9000\n" |
| 4129 | "#include \"crt/device_functions.hpp\"\n" |
| 4130 | "#include \"crt/device_double_functions.hpp\"\n" |
| 4131 | "#else\n" |
| 4132 | "#include \"device_functions.hpp\"\n" |
| 4133 | "#define __CUDABE__\n" |
| 4134 | "#include \"device_double_functions.h\"\n" |
| 4135 | "#undef __CUDABE__\n" |
| 4136 | "#endif\n" |
| 4137 | "#include \"sm_20_atomic_functions.hpp\"\n" |
| 4138 | "#include \"sm_20_intrinsics.hpp\"\n" |
| 4139 | "#include \"sm_32_atomic_functions.hpp\"\n" |
| 4140 | "\n" |
| 4141 | "// Don't include sm_30_intrinsics.h and sm_32_intrinsics.h. These define the\n" |
| 4142 | "// __shfl and __ldg intrinsics using inline (volatile) asm, but we want to\n" |
| 4143 | "// define them using builtins so that the optimizer can reason about and across\n" |
| 4144 | "// these instructions. In particular, using intrinsics for ldg gets us the\n" |
| 4145 | "// [addr+imm] addressing mode, which, although it doesn't actually exist in the\n" |
| 4146 | "// hardware, seems to generate faster machine code because ptxas can more easily\n" |
| 4147 | "// reason about our code.\n" |
| 4148 | "\n" |
| 4149 | "#if CUDA_VERSION >= 8000\n" |
| 4150 | "#pragma push_macro(\"__CUDA_ARCH__\")\n" |
| 4151 | "#undef __CUDA_ARCH__\n" |
| 4152 | "#include \"sm_60_atomic_functions.hpp\"\n" |
| 4153 | "#include \"sm_61_intrinsics.hpp\"\n" |
| 4154 | "#pragma pop_macro(\"__CUDA_ARCH__\")\n" |
| 4155 | "#endif\n" |
| 4156 | "\n" |
| 4157 | "#undef __MATH_FUNCTIONS_HPP__\n" |
| 4158 | "\n" |
| 4159 | "// math_functions.hpp defines ::signbit as a __host__ __device__ function. This\n" |
| 4160 | "// conflicts with libstdc++'s constexpr ::signbit, so we have to rename\n" |
| 4161 | "// math_function.hpp's ::signbit. It's guarded by #undef signbit, but that's\n" |
| 4162 | "// conditional on __GNUC__. :)\n" |
| 4163 | "#pragma push_macro(\"signbit\")\n" |
| 4164 | "#pragma push_macro(\"__GNUC__\")\n" |
| 4165 | "#undef __GNUC__\n" |
| 4166 | "#define signbit __ignored_cuda_signbit\n" |
| 4167 | "\n" |
| 4168 | "// CUDA-9 omits device-side definitions of some math functions if it sees\n" |
| 4169 | "// include guard from math.h wrapper from libstdc++. We have to undo the header\n" |
| 4170 | "// guard temporarily to get the definitions we need.\n" |
| 4171 | "#pragma push_macro(\"_GLIBCXX_MATH_H\")\n" |
| 4172 | "#pragma push_macro(\"_LIBCPP_VERSION\")\n" |
| 4173 | "#if CUDA_VERSION >= 9000\n" |
| 4174 | "#undef _GLIBCXX_MATH_H\n" |
| 4175 | "// We also need to undo another guard that checks for libc++ 3.8+\n" |
| 4176 | "#ifdef _LIBCPP_VERSION\n" |
| 4177 | "#define _LIBCPP_VERSION 3700\n" |
| 4178 | "#endif\n" |
| 4179 | "#endif\n" |
| 4180 | "\n" |
| 4181 | "#if CUDA_VERSION >= 9000\n" |
| 4182 | "#include \"crt/math_functions.hpp\"\n" |
| 4183 | "#else\n" |
| 4184 | "#include \"math_functions.hpp\"\n" |
| 4185 | "#endif\n" |
| 4186 | "#pragma pop_macro(\"_GLIBCXX_MATH_H\")\n" |
| 4187 | "#pragma pop_macro(\"_LIBCPP_VERSION\")\n" |
| 4188 | "#pragma pop_macro(\"__GNUC__\")\n" |
| 4189 | "#pragma pop_macro(\"signbit\")\n" |
| 4190 | "\n" |
| 4191 | "#pragma pop_macro(\"__host__\")\n" |
| 4192 | "\n" |
| 4193 | "#include \"texture_indirect_functions.h\"\n" |
| 4194 | "\n" |
| 4195 | "// Restore state of __CUDA_ARCH__ and __THROW we had on entry.\n" |
| 4196 | "#pragma pop_macro(\"__CUDA_ARCH__\")\n" |
| 4197 | "#pragma pop_macro(\"__THROW\")\n" |
| 4198 | "\n" |
| 4199 | "// Set up compiler macros expected to be seen during compilation.\n" |
| 4200 | "#undef __CUDABE__\n" |
| 4201 | "#define __CUDACC__\n" |
| 4202 | "\n" |
| 4203 | "extern \"C\" {\n" |
| 4204 | "// Device-side CUDA system calls.\n" |
| 4205 | "// http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls\n" |
| 4206 | "// We need these declarations and wrappers for device-side\n" |
| 4207 | "// malloc/free/printf calls to work without relying on\n" |
| 4208 | "// -fcuda-disable-target-call-checks option.\n" |
| 4209 | "__device__ int vprintf(const char *, const char *);\n" |
| 4210 | "__device__ void free(void *) __attribute((nothrow));\n" |
| 4211 | "__device__ void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc));\n" |
| 4212 | "__device__ void __assertfail(const char *__message, const char *__file,\n" |
| 4213 | " unsigned __line, const char *__function,\n" |
| 4214 | " size_t __charSize) __attribute__((noreturn));\n" |
| 4215 | "\n" |
| 4216 | "// In order for standard assert() macro on linux to work we need to\n" |
| 4217 | "// provide device-side __assert_fail()\n" |
| 4218 | "__device__ static inline void __assert_fail(const char *__message,\n" |
| 4219 | " const char *__file, unsigned __line,\n" |
| 4220 | " const char *__function) {\n" |
| 4221 | " __assertfail(__message, __file, __line, __function, sizeof(char));\n" |
| 4222 | "}\n" |
| 4223 | "\n" |
| 4224 | "// Clang will convert printf into vprintf, but we still need\n" |
| 4225 | "// device-side declaration for it.\n" |
| 4226 | "__device__ int printf(const char *, ...);\n" |
| 4227 | "} // extern \"C\"\n" |
| 4228 | "\n" |
| 4229 | "// We also need device-side std::malloc and std::free.\n" |
| 4230 | "namespace std {\n" |
| 4231 | "__device__ static inline void free(void *__ptr) { ::free(__ptr); }\n" |
| 4232 | "__device__ static inline void *malloc(size_t __size) {\n" |
| 4233 | " return ::malloc(__size);\n" |
| 4234 | "}\n" |
| 4235 | "} // namespace std\n" |
| 4236 | "\n" |
| 4237 | "// Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to\n" |
| 4238 | "// come after we've pulled in the definition of uint3 and dim3.\n" |
| 4239 | "\n" |
| 4240 | "__device__ inline __cuda_builtin_threadIdx_t::operator uint3() const {\n" |
| 4241 | " uint3 ret;\n" |
| 4242 | " ret.x = x;\n" |
| 4243 | " ret.y = y;\n" |
| 4244 | " ret.z = z;\n" |
| 4245 | " return ret;\n" |
| 4246 | "}\n" |
| 4247 | "\n" |
| 4248 | "__device__ inline __cuda_builtin_blockIdx_t::operator uint3() const {\n" |
| 4249 | " uint3 ret;\n" |
| 4250 | " ret.x = x;\n" |
| 4251 | " ret.y = y;\n" |
| 4252 | " ret.z = z;\n" |
| 4253 | " return ret;\n" |
| 4254 | "}\n" |
| 4255 | "\n" |
| 4256 | "__device__ inline __cuda_builtin_blockDim_t::operator dim3() const {\n" |
| 4257 | " return dim3(x, y, z);\n" |
| 4258 | "}\n" |
| 4259 | "\n" |
| 4260 | "__device__ inline __cuda_builtin_gridDim_t::operator dim3() const {\n" |
| 4261 | " return dim3(x, y, z);\n" |
| 4262 | "}\n" |
| 4263 | "\n" |
| 4264 | "#include <__clang_cuda_cmath.h>\n" |
| 4265 | "#include <__clang_cuda_intrinsics.h>\n" |
| 4266 | "#include <__clang_cuda_complex_builtins.h>\n" |
| 4267 | "\n" |
| 4268 | "// curand_mtgp32_kernel helpfully redeclares blockDim and threadIdx in host\n" |
| 4269 | "// mode, giving them their \"proper\" types of dim3 and uint3. This is\n" |
| 4270 | "// incompatible with the types we give in __clang_cuda_builtin_vars.h. As as\n" |
| 4271 | "// hack, force-include the header (nvcc doesn't include it by default) but\n" |
| 4272 | "// redefine dim3 and uint3 to our builtin types. (Thankfully dim3 and uint3 are\n" |
| 4273 | "// only used here for the redeclarations of blockDim and threadIdx.)\n" |
| 4274 | "#pragma push_macro(\"dim3\")\n" |
| 4275 | "#pragma push_macro(\"uint3\")\n" |
| 4276 | "#define dim3 __cuda_builtin_blockDim_t\n" |
| 4277 | "#define uint3 __cuda_builtin_threadIdx_t\n" |
| 4278 | "#include \"curand_mtgp32_kernel.h\"\n" |
| 4279 | "#pragma pop_macro(\"dim3\")\n" |
| 4280 | "#pragma pop_macro(\"uint3\")\n" |
| 4281 | "#pragma pop_macro(\"__USE_FAST_MATH__\")\n" |
| 4282 | "\n" |
| 4283 | "#endif // __CUDA__\n" |
| 4284 | "#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__\n" |
| 4285 | "" } , |
| 4286 | { "/builtins/__stddef_max_align_t.h" , "/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===\n" |
| 4287 | " *\n" |
| 4288 | " * Copyright (c) 2014 Chandler Carruth\n" |
| 4289 | " *\n" |
| 4290 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 4291 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 4292 | " * in the Software without restriction, including without limitation the rights\n" |
| 4293 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 4294 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 4295 | " * furnished to do so, subject to the following conditions:\n" |
| 4296 | " *\n" |
| 4297 | " * The above copyright notice and this permission notice shall be included in\n" |
| 4298 | " * all copies or substantial portions of the Software.\n" |
| 4299 | " *\n" |
| 4300 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 4301 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 4302 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 4303 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 4304 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 4305 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 4306 | " * THE SOFTWARE.\n" |
| 4307 | " *\n" |
| 4308 | " *===-----------------------------------------------------------------------===\n" |
| 4309 | " */\n" |
| 4310 | "\n" |
| 4311 | "#ifndef __CLANG_MAX_ALIGN_T_DEFINED\n" |
| 4312 | "#define __CLANG_MAX_ALIGN_T_DEFINED\n" |
| 4313 | "\n" |
| 4314 | "#if defined(_MSC_VER)\n" |
| 4315 | "typedef double max_align_t;\n" |
| 4316 | "#elif defined(__APPLE__)\n" |
| 4317 | "typedef long double max_align_t;\n" |
| 4318 | "#else\n" |
| 4319 | "// Define 'max_align_t' to match the GCC definition.\n" |
| 4320 | "typedef struct {\n" |
| 4321 | " long long __clang_max_align_nonce1\n" |
| 4322 | " __attribute__((__aligned__(__alignof__(long long))));\n" |
| 4323 | " long double __clang_max_align_nonce2\n" |
| 4324 | " __attribute__((__aligned__(__alignof__(long double))));\n" |
| 4325 | "} max_align_t;\n" |
| 4326 | "#endif\n" |
| 4327 | "\n" |
| 4328 | "#endif\n" |
| 4329 | "" } , |
| 4330 | { "/builtins/__wmmintrin_aes.h" , "/*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------===\n" |
| 4331 | " *\n" |
| 4332 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 4333 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 4334 | " * in the Software without restriction, including without limitation the rights\n" |
| 4335 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 4336 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 4337 | " * furnished to do so, subject to the following conditions:\n" |
| 4338 | " *\n" |
| 4339 | " * The above copyright notice and this permission notice shall be included in\n" |
| 4340 | " * all copies or substantial portions of the Software.\n" |
| 4341 | " *\n" |
| 4342 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 4343 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 4344 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 4345 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 4346 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 4347 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 4348 | " * THE SOFTWARE.\n" |
| 4349 | " *\n" |
| 4350 | " *===-----------------------------------------------------------------------===\n" |
| 4351 | " */\n" |
| 4352 | "\n" |
| 4353 | "#ifndef __WMMINTRIN_H\n" |
| 4354 | "#error \"Never use <__wmmintrin_aes.h> directly; include <wmmintrin.h> instead.\"\n" |
| 4355 | "#endif\n" |
| 4356 | "\n" |
| 4357 | "#ifndef __WMMINTRIN_AES_H\n" |
| 4358 | "#define __WMMINTRIN_AES_H\n" |
| 4359 | "\n" |
| 4360 | "/* Define the default attributes for the functions in this file. */\n" |
| 4361 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"aes\"), __min_vector_width__(128)))\n" |
| 4362 | "\n" |
| 4363 | "/// Performs a single round of AES encryption using the Equivalent\n" |
| 4364 | "/// Inverse Cipher, transforming the state value from the first source\n" |
| 4365 | "/// operand using a 128-bit round key value contained in the second source\n" |
| 4366 | "/// operand, and writes the result to the destination.\n" |
| 4367 | "///\n" |
| 4368 | "/// \\headerfile <x86intrin.h>\n" |
| 4369 | "///\n" |
| 4370 | "/// This intrinsic corresponds to the <c> VAESENC </c> instruction.\n" |
| 4371 | "///\n" |
| 4372 | "/// \\param __V\n" |
| 4373 | "/// A 128-bit integer vector containing the state value.\n" |
| 4374 | "/// \\param __R\n" |
| 4375 | "/// A 128-bit integer vector containing the round key value.\n" |
| 4376 | "/// \\returns A 128-bit integer vector containing the encrypted value.\n" |
| 4377 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 4378 | "_mm_aesenc_si128(__m128i __V, __m128i __R)\n" |
| 4379 | "{\n" |
| 4380 | " return (__m128i)__builtin_ia32_aesenc128((__v2di)__V, (__v2di)__R);\n" |
| 4381 | "}\n" |
| 4382 | "\n" |
| 4383 | "/// Performs the final round of AES encryption using the Equivalent\n" |
| 4384 | "/// Inverse Cipher, transforming the state value from the first source\n" |
| 4385 | "/// operand using a 128-bit round key value contained in the second source\n" |
| 4386 | "/// operand, and writes the result to the destination.\n" |
| 4387 | "///\n" |
| 4388 | "/// \\headerfile <x86intrin.h>\n" |
| 4389 | "///\n" |
| 4390 | "/// This intrinsic corresponds to the <c> VAESENCLAST </c> instruction.\n" |
| 4391 | "///\n" |
| 4392 | "/// \\param __V\n" |
| 4393 | "/// A 128-bit integer vector containing the state value.\n" |
| 4394 | "/// \\param __R\n" |
| 4395 | "/// A 128-bit integer vector containing the round key value.\n" |
| 4396 | "/// \\returns A 128-bit integer vector containing the encrypted value.\n" |
| 4397 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 4398 | "_mm_aesenclast_si128(__m128i __V, __m128i __R)\n" |
| 4399 | "{\n" |
| 4400 | " return (__m128i)__builtin_ia32_aesenclast128((__v2di)__V, (__v2di)__R);\n" |
| 4401 | "}\n" |
| 4402 | "\n" |
| 4403 | "/// Performs a single round of AES decryption using the Equivalent\n" |
| 4404 | "/// Inverse Cipher, transforming the state value from the first source\n" |
| 4405 | "/// operand using a 128-bit round key value contained in the second source\n" |
| 4406 | "/// operand, and writes the result to the destination.\n" |
| 4407 | "///\n" |
| 4408 | "/// \\headerfile <x86intrin.h>\n" |
| 4409 | "///\n" |
| 4410 | "/// This intrinsic corresponds to the <c> VAESDEC </c> instruction.\n" |
| 4411 | "///\n" |
| 4412 | "/// \\param __V\n" |
| 4413 | "/// A 128-bit integer vector containing the state value.\n" |
| 4414 | "/// \\param __R\n" |
| 4415 | "/// A 128-bit integer vector containing the round key value.\n" |
| 4416 | "/// \\returns A 128-bit integer vector containing the decrypted value.\n" |
| 4417 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 4418 | "_mm_aesdec_si128(__m128i __V, __m128i __R)\n" |
| 4419 | "{\n" |
| 4420 | " return (__m128i)__builtin_ia32_aesdec128((__v2di)__V, (__v2di)__R);\n" |
| 4421 | "}\n" |
| 4422 | "\n" |
| 4423 | "/// Performs the final round of AES decryption using the Equivalent\n" |
| 4424 | "/// Inverse Cipher, transforming the state value from the first source\n" |
| 4425 | "/// operand using a 128-bit round key value contained in the second source\n" |
| 4426 | "/// operand, and writes the result to the destination.\n" |
| 4427 | "///\n" |
| 4428 | "/// \\headerfile <x86intrin.h>\n" |
| 4429 | "///\n" |
| 4430 | "/// This intrinsic corresponds to the <c> VAESDECLAST </c> instruction.\n" |
| 4431 | "///\n" |
| 4432 | "/// \\param __V\n" |
| 4433 | "/// A 128-bit integer vector containing the state value.\n" |
| 4434 | "/// \\param __R\n" |
| 4435 | "/// A 128-bit integer vector containing the round key value.\n" |
| 4436 | "/// \\returns A 128-bit integer vector containing the decrypted value.\n" |
| 4437 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 4438 | "_mm_aesdeclast_si128(__m128i __V, __m128i __R)\n" |
| 4439 | "{\n" |
| 4440 | " return (__m128i)__builtin_ia32_aesdeclast128((__v2di)__V, (__v2di)__R);\n" |
| 4441 | "}\n" |
| 4442 | "\n" |
| 4443 | "/// Applies the AES InvMixColumns() transformation to an expanded key\n" |
| 4444 | "/// contained in the source operand, and writes the result to the\n" |
| 4445 | "/// destination.\n" |
| 4446 | "///\n" |
| 4447 | "/// \\headerfile <x86intrin.h>\n" |
| 4448 | "///\n" |
| 4449 | "/// This intrinsic corresponds to the <c> VAESIMC </c> instruction.\n" |
| 4450 | "///\n" |
| 4451 | "/// \\param __V\n" |
| 4452 | "/// A 128-bit integer vector containing the expanded key.\n" |
| 4453 | "/// \\returns A 128-bit integer vector containing the transformed value.\n" |
| 4454 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 4455 | "_mm_aesimc_si128(__m128i __V)\n" |
| 4456 | "{\n" |
| 4457 | " return (__m128i)__builtin_ia32_aesimc128((__v2di)__V);\n" |
| 4458 | "}\n" |
| 4459 | "\n" |
| 4460 | "/// Generates a round key for AES encryption, operating on 128-bit data\n" |
| 4461 | "/// specified in the first source operand and using an 8-bit round constant\n" |
| 4462 | "/// specified by the second source operand, and writes the result to the\n" |
| 4463 | "/// destination.\n" |
| 4464 | "///\n" |
| 4465 | "/// \\headerfile <x86intrin.h>\n" |
| 4466 | "///\n" |
| 4467 | "/// \\code\n" |
| 4468 | "/// __m128i _mm_aeskeygenassist_si128(__m128i C, const int R);\n" |
| 4469 | "/// \\endcode\n" |
| 4470 | "///\n" |
| 4471 | "/// This intrinsic corresponds to the <c> AESKEYGENASSIST </c> instruction.\n" |
| 4472 | "///\n" |
| 4473 | "/// \\param C\n" |
| 4474 | "/// A 128-bit integer vector that is used to generate the AES encryption key.\n" |
| 4475 | "/// \\param R\n" |
| 4476 | "/// An 8-bit round constant used to generate the AES encryption key.\n" |
| 4477 | "/// \\returns A 128-bit round key for AES encryption.\n" |
| 4478 | "#define _mm_aeskeygenassist_si128(C, R) \\\n" |
| 4479 | " (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))\n" |
| 4480 | "\n" |
| 4481 | "#undef __DEFAULT_FN_ATTRS\n" |
| 4482 | "\n" |
| 4483 | "#endif /* __WMMINTRIN_AES_H */\n" |
| 4484 | "" } , |
| 4485 | { "/builtins/__wmmintrin_pclmul.h" , "/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===\n" |
| 4486 | " *\n" |
| 4487 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 4488 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 4489 | " * in the Software without restriction, including without limitation the rights\n" |
| 4490 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 4491 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 4492 | " * furnished to do so, subject to the following conditions:\n" |
| 4493 | " *\n" |
| 4494 | " * The above copyright notice and this permission notice shall be included in\n" |
| 4495 | " * all copies or substantial portions of the Software.\n" |
| 4496 | " *\n" |
| 4497 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 4498 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 4499 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 4500 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 4501 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 4502 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 4503 | " * THE SOFTWARE.\n" |
| 4504 | " *\n" |
| 4505 | " *===-----------------------------------------------------------------------===\n" |
| 4506 | " */\n" |
| 4507 | "\n" |
| 4508 | "#ifndef __WMMINTRIN_H\n" |
| 4509 | "#error \"Never use <__wmmintrin_pclmul.h> directly; include <wmmintrin.h> instead.\"\n" |
| 4510 | "#endif\n" |
| 4511 | "\n" |
| 4512 | "#ifndef __WMMINTRIN_PCLMUL_H\n" |
| 4513 | "#define __WMMINTRIN_PCLMUL_H\n" |
| 4514 | "\n" |
| 4515 | "/// Multiplies two 64-bit integer values, which are selected from source\n" |
| 4516 | "/// operands using the immediate-value operand. The multiplication is a\n" |
| 4517 | "/// carry-less multiplication, and the 128-bit integer product is stored in\n" |
| 4518 | "/// the destination.\n" |
| 4519 | "///\n" |
| 4520 | "/// \\headerfile <x86intrin.h>\n" |
| 4521 | "///\n" |
| 4522 | "/// \\code\n" |
| 4523 | "/// __m128i _mm_clmulepi64_si128(__m128i __X, __m128i __Y, const int __I);\n" |
| 4524 | "/// \\endcode\n" |
| 4525 | "///\n" |
| 4526 | "/// This intrinsic corresponds to the <c> VPCLMULQDQ </c> instruction.\n" |
| 4527 | "///\n" |
| 4528 | "/// \\param __X\n" |
| 4529 | "/// A 128-bit vector of [2 x i64] containing one of the source operands.\n" |
| 4530 | "/// \\param __Y\n" |
| 4531 | "/// A 128-bit vector of [2 x i64] containing one of the source operands.\n" |
| 4532 | "/// \\param __I\n" |
| 4533 | "/// An immediate value specifying which 64-bit values to select from the\n" |
| 4534 | "/// operands. Bit 0 is used to select a value from operand \\a __X, and bit\n" |
| 4535 | "/// 4 is used to select a value from operand \\a __Y: \\n\n" |
| 4536 | "/// Bit[0]=0 indicates that bits[63:0] of operand \\a __X are used. \\n\n" |
| 4537 | "/// Bit[0]=1 indicates that bits[127:64] of operand \\a __X are used. \\n\n" |
| 4538 | "/// Bit[4]=0 indicates that bits[63:0] of operand \\a __Y are used. \\n\n" |
| 4539 | "/// Bit[4]=1 indicates that bits[127:64] of operand \\a __Y are used.\n" |
| 4540 | "/// \\returns The 128-bit integer vector containing the result of the carry-less\n" |
| 4541 | "/// multiplication of the selected 64-bit values.\n" |
| 4542 | "#define _mm_clmulepi64_si128(X, Y, I) \\\n" |
| 4543 | " ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \\\n" |
| 4544 | " (__v2di)(__m128i)(Y), (char)(I)))\n" |
| 4545 | "\n" |
| 4546 | "#endif /* __WMMINTRIN_PCLMUL_H */\n" |
| 4547 | "" } , |
| 4548 | { "/builtins/adxintrin.h" , "/*===---- adxintrin.h - ADX intrinsics -------------------------------------===\n" |
| 4549 | " *\n" |
| 4550 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 4551 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 4552 | " * in the Software without restriction, including without limitation the rights\n" |
| 4553 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 4554 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 4555 | " * furnished to do so, subject to the following conditions:\n" |
| 4556 | " *\n" |
| 4557 | " * The above copyright notice and this permission notice shall be included in\n" |
| 4558 | " * all copies or substantial portions of the Software.\n" |
| 4559 | " *\n" |
| 4560 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 4561 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 4562 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 4563 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 4564 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 4565 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 4566 | " * THE SOFTWARE.\n" |
| 4567 | " *\n" |
| 4568 | " *===-----------------------------------------------------------------------===\n" |
| 4569 | " */\n" |
| 4570 | "\n" |
| 4571 | "#ifndef __IMMINTRIN_H\n" |
| 4572 | "#error \"Never use <adxintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 4573 | "#endif\n" |
| 4574 | "\n" |
| 4575 | "#ifndef __ADXINTRIN_H\n" |
| 4576 | "#define __ADXINTRIN_H\n" |
| 4577 | "\n" |
| 4578 | "/* Define the default attributes for the functions in this file. */\n" |
| 4579 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n" |
| 4580 | "\n" |
| 4581 | "/* Intrinsics that are available only if __ADX__ defined */\n" |
| 4582 | "static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n" |
| 4583 | "_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n" |
| 4584 | " unsigned int *__p)\n" |
| 4585 | "{\n" |
| 4586 | " return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);\n" |
| 4587 | "}\n" |
| 4588 | "\n" |
| 4589 | "#ifdef __x86_64__\n" |
| 4590 | "static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__(\"adx\")))\n" |
| 4591 | "_addcarryx_u64(unsigned char __cf, unsigned long long __x,\n" |
| 4592 | " unsigned long long __y, unsigned long long *__p)\n" |
| 4593 | "{\n" |
| 4594 | " return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);\n" |
| 4595 | "}\n" |
| 4596 | "#endif\n" |
| 4597 | "\n" |
| 4598 | "/* Intrinsics that are also available if __ADX__ undefined */\n" |
| 4599 | "static __inline unsigned char __DEFAULT_FN_ATTRS\n" |
| 4600 | "_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n" |
| 4601 | " unsigned int *__p)\n" |
| 4602 | "{\n" |
| 4603 | " return __builtin_ia32_addcarry_u32(__cf, __x, __y, __p);\n" |
| 4604 | "}\n" |
| 4605 | "\n" |
| 4606 | "#ifdef __x86_64__\n" |
| 4607 | "static __inline unsigned char __DEFAULT_FN_ATTRS\n" |
| 4608 | "_addcarry_u64(unsigned char __cf, unsigned long long __x,\n" |
| 4609 | " unsigned long long __y, unsigned long long *__p)\n" |
| 4610 | "{\n" |
| 4611 | " return __builtin_ia32_addcarry_u64(__cf, __x, __y, __p);\n" |
| 4612 | "}\n" |
| 4613 | "#endif\n" |
| 4614 | "\n" |
| 4615 | "static __inline unsigned char __DEFAULT_FN_ATTRS\n" |
| 4616 | "_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,\n" |
| 4617 | " unsigned int *__p)\n" |
| 4618 | "{\n" |
| 4619 | " return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);\n" |
| 4620 | "}\n" |
| 4621 | "\n" |
| 4622 | "#ifdef __x86_64__\n" |
| 4623 | "static __inline unsigned char __DEFAULT_FN_ATTRS\n" |
| 4624 | "_subborrow_u64(unsigned char __cf, unsigned long long __x,\n" |
| 4625 | " unsigned long long __y, unsigned long long *__p)\n" |
| 4626 | "{\n" |
| 4627 | " return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);\n" |
| 4628 | "}\n" |
| 4629 | "#endif\n" |
| 4630 | "\n" |
| 4631 | "#undef __DEFAULT_FN_ATTRS\n" |
| 4632 | "\n" |
| 4633 | "#endif /* __ADXINTRIN_H */\n" |
| 4634 | "" } , |
| 4635 | { "/builtins/ammintrin.h" , "/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===\n" |
| 4636 | " *\n" |
| 4637 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 4638 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 4639 | " * in the Software without restriction, including without limitation the rights\n" |
| 4640 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 4641 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 4642 | " * furnished to do so, subject to the following conditions:\n" |
| 4643 | " *\n" |
| 4644 | " * The above copyright notice and this permission notice shall be included in\n" |
| 4645 | " * all copies or substantial portions of the Software.\n" |
| 4646 | " *\n" |
| 4647 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 4648 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 4649 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 4650 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 4651 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 4652 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 4653 | " * THE SOFTWARE.\n" |
| 4654 | " *\n" |
| 4655 | " *===-----------------------------------------------------------------------===\n" |
| 4656 | " */\n" |
| 4657 | "\n" |
| 4658 | "#ifndef __AMMINTRIN_H\n" |
| 4659 | "#define __AMMINTRIN_H\n" |
| 4660 | "\n" |
| 4661 | "#include <pmmintrin.h>\n" |
| 4662 | "\n" |
| 4663 | "/* Define the default attributes for the functions in this file. */\n" |
| 4664 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4a\"), __min_vector_width__(128)))\n" |
| 4665 | "\n" |
| 4666 | "/// Extracts the specified bits from the lower 64 bits of the 128-bit\n" |
| 4667 | "/// integer vector operand at the index \\a idx and of the length \\a len.\n" |
| 4668 | "///\n" |
| 4669 | "/// \\headerfile <x86intrin.h>\n" |
| 4670 | "///\n" |
| 4671 | "/// \\code\n" |
| 4672 | "/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);\n" |
| 4673 | "/// \\endcode\n" |
| 4674 | "///\n" |
| 4675 | "/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n" |
| 4676 | "///\n" |
| 4677 | "/// \\param x\n" |
| 4678 | "/// The value from which bits are extracted.\n" |
| 4679 | "/// \\param len\n" |
| 4680 | "/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n" |
| 4681 | "/// are zero, the length is interpreted as 64.\n" |
| 4682 | "/// \\param idx\n" |
| 4683 | "/// Bits [5:0] specify the index of the least significant bit; the other\n" |
| 4684 | "/// bits are ignored. If the sum of the index and length is greater than 64,\n" |
| 4685 | "/// the result is undefined. If the length and index are both zero, bits\n" |
| 4686 | "/// [63:0] of parameter \\a x are extracted. If the length is zero but the\n" |
| 4687 | "/// index is non-zero, the result is undefined.\n" |
| 4688 | "/// \\returns A 128-bit integer vector whose lower 64 bits contain the bits\n" |
| 4689 | "/// extracted from the source operand.\n" |
| 4690 | "#define _mm_extracti_si64(x, len, idx) \\\n" |
| 4691 | " ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \\\n" |
| 4692 | " (char)(len), (char)(idx)))\n" |
| 4693 | "\n" |
| 4694 | "/// Extracts the specified bits from the lower 64 bits of the 128-bit\n" |
| 4695 | "/// integer vector operand at the index and of the length specified by\n" |
| 4696 | "/// \\a __y.\n" |
| 4697 | "///\n" |
| 4698 | "/// \\headerfile <x86intrin.h>\n" |
| 4699 | "///\n" |
| 4700 | "/// This intrinsic corresponds to the <c> EXTRQ </c> instruction.\n" |
| 4701 | "///\n" |
| 4702 | "/// \\param __x\n" |
| 4703 | "/// The value from which bits are extracted.\n" |
| 4704 | "/// \\param __y\n" |
| 4705 | "/// Specifies the index of the least significant bit at [13:8] and the\n" |
| 4706 | "/// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the\n" |
| 4707 | "/// length is interpreted as 64. If the sum of the index and length is\n" |
| 4708 | "/// greater than 64, the result is undefined. If the length and index are\n" |
| 4709 | "/// both zero, bits [63:0] of parameter \\a __x are extracted. If the length\n" |
| 4710 | "/// is zero but the index is non-zero, the result is undefined.\n" |
| 4711 | "/// \\returns A 128-bit vector whose lower 64 bits contain the bits extracted\n" |
| 4712 | "/// from the source operand.\n" |
| 4713 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 4714 | "_mm_extract_si64(__m128i __x, __m128i __y)\n" |
| 4715 | "{\n" |
| 4716 | " return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);\n" |
| 4717 | "}\n" |
| 4718 | "\n" |
| 4719 | "/// Inserts bits of a specified length from the source integer vector\n" |
| 4720 | "/// \\a y into the lower 64 bits of the destination integer vector \\a x at\n" |
| 4721 | "/// the index \\a idx and of the length \\a len.\n" |
| 4722 | "///\n" |
| 4723 | "/// \\headerfile <x86intrin.h>\n" |
| 4724 | "///\n" |
| 4725 | "/// \\code\n" |
| 4726 | "/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,\n" |
| 4727 | "/// const int idx);\n" |
| 4728 | "/// \\endcode\n" |
| 4729 | "///\n" |
| 4730 | "/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n" |
| 4731 | "///\n" |
| 4732 | "/// \\param x\n" |
| 4733 | "/// The destination operand where bits will be inserted. The inserted bits\n" |
| 4734 | "/// are defined by the length \\a len and by the index \\a idx specifying the\n" |
| 4735 | "/// least significant bit.\n" |
| 4736 | "/// \\param y\n" |
| 4737 | "/// The source operand containing the bits to be extracted. The extracted\n" |
| 4738 | "/// bits are the least significant bits of operand \\a y of length \\a len.\n" |
| 4739 | "/// \\param len\n" |
| 4740 | "/// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]\n" |
| 4741 | "/// are zero, the length is interpreted as 64.\n" |
| 4742 | "/// \\param idx\n" |
| 4743 | "/// Bits [5:0] specify the index of the least significant bit; the other\n" |
| 4744 | "/// bits are ignored. If the sum of the index and length is greater than 64,\n" |
| 4745 | "/// the result is undefined. If the length and index are both zero, bits\n" |
| 4746 | "/// [63:0] of parameter \\a y are inserted into parameter \\a x. If the length\n" |
| 4747 | "/// is zero but the index is non-zero, the result is undefined.\n" |
| 4748 | "/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n" |
| 4749 | "/// destination operand \\a x with the specified bitfields replaced by the\n" |
| 4750 | "/// lower bits of source operand \\a y. The upper 64 bits of the return value\n" |
| 4751 | "/// are undefined.\n" |
| 4752 | "#define _mm_inserti_si64(x, y, len, idx) \\\n" |
| 4753 | " ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \\\n" |
| 4754 | " (__v2di)(__m128i)(y), \\\n" |
| 4755 | " (char)(len), (char)(idx)))\n" |
| 4756 | "\n" |
| 4757 | "/// Inserts bits of a specified length from the source integer vector\n" |
| 4758 | "/// \\a __y into the lower 64 bits of the destination integer vector \\a __x\n" |
| 4759 | "/// at the index and of the length specified by \\a __y.\n" |
| 4760 | "///\n" |
| 4761 | "/// \\headerfile <x86intrin.h>\n" |
| 4762 | "///\n" |
| 4763 | "/// This intrinsic corresponds to the <c> INSERTQ </c> instruction.\n" |
| 4764 | "///\n" |
| 4765 | "/// \\param __x\n" |
| 4766 | "/// The destination operand where bits will be inserted. The inserted bits\n" |
| 4767 | "/// are defined by the length and by the index of the least significant bit\n" |
| 4768 | "/// specified by operand \\a __y.\n" |
| 4769 | "/// \\param __y\n" |
| 4770 | "/// The source operand containing the bits to be extracted. The extracted\n" |
| 4771 | "/// bits are the least significant bits of operand \\a __y with length\n" |
| 4772 | "/// specified by bits [69:64]. These are inserted into the destination at the\n" |
| 4773 | "/// index specified by bits [77:72]; all other bits are ignored. If bits\n" |
| 4774 | "/// [69:64] are zero, the length is interpreted as 64. If the sum of the\n" |
| 4775 | "/// index and length is greater than 64, the result is undefined. If the\n" |
| 4776 | "/// length and index are both zero, bits [63:0] of parameter \\a __y are\n" |
| 4777 | "/// inserted into parameter \\a __x. If the length is zero but the index is\n" |
| 4778 | "/// non-zero, the result is undefined.\n" |
| 4779 | "/// \\returns A 128-bit integer vector containing the original lower 64-bits of\n" |
| 4780 | "/// destination operand \\a __x with the specified bitfields replaced by the\n" |
| 4781 | "/// lower bits of source operand \\a __y. The upper 64 bits of the return\n" |
| 4782 | "/// value are undefined.\n" |
| 4783 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 4784 | "_mm_insert_si64(__m128i __x, __m128i __y)\n" |
| 4785 | "{\n" |
| 4786 | " return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);\n" |
| 4787 | "}\n" |
| 4788 | "\n" |
| 4789 | "/// Stores a 64-bit double-precision value in a 64-bit memory location.\n" |
| 4790 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
| 4791 | "/// used again soon).\n" |
| 4792 | "///\n" |
| 4793 | "/// \\headerfile <x86intrin.h>\n" |
| 4794 | "///\n" |
| 4795 | "/// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.\n" |
| 4796 | "///\n" |
| 4797 | "/// \\param __p\n" |
| 4798 | "/// The 64-bit memory location used to store the register value.\n" |
| 4799 | "/// \\param __a\n" |
| 4800 | "/// The 64-bit double-precision floating-point register value to be stored.\n" |
| 4801 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 4802 | "_mm_stream_sd(double *__p, __m128d __a)\n" |
| 4803 | "{\n" |
| 4804 | " __builtin_ia32_movntsd(__p, (__v2df)__a);\n" |
| 4805 | "}\n" |
| 4806 | "\n" |
| 4807 | "/// Stores a 32-bit single-precision floating-point value in a 32-bit\n" |
| 4808 | "/// memory location. To minimize caching, the data is flagged as\n" |
| 4809 | "/// non-temporal (unlikely to be used again soon).\n" |
| 4810 | "///\n" |
| 4811 | "/// \\headerfile <x86intrin.h>\n" |
| 4812 | "///\n" |
| 4813 | "/// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.\n" |
| 4814 | "///\n" |
| 4815 | "/// \\param __p\n" |
| 4816 | "/// The 32-bit memory location used to store the register value.\n" |
| 4817 | "/// \\param __a\n" |
| 4818 | "/// The 32-bit single-precision floating-point register value to be stored.\n" |
| 4819 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 4820 | "_mm_stream_ss(float *__p, __m128 __a)\n" |
| 4821 | "{\n" |
| 4822 | " __builtin_ia32_movntss(__p, (__v4sf)__a);\n" |
| 4823 | "}\n" |
| 4824 | "\n" |
| 4825 | "#undef __DEFAULT_FN_ATTRS\n" |
| 4826 | "\n" |
| 4827 | "#endif /* __AMMINTRIN_H */\n" |
| 4828 | "" } , |
| 4829 | { "/builtins/arm64intr.h" , "/*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------===\n" |
| 4830 | " *\n" |
| 4831 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 4832 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 4833 | " * in the Software without restriction, including without limitation the rights\n" |
| 4834 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 4835 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 4836 | " * furnished to do so, subject to the following conditions:\n" |
| 4837 | " *\n" |
| 4838 | " * The above copyright notice and this permission notice shall be included in\n" |
| 4839 | " * all copies or substantial portions of the Software.\n" |
| 4840 | " *\n" |
| 4841 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 4842 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 4843 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 4844 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 4845 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 4846 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 4847 | " * THE SOFTWARE.\n" |
| 4848 | " *\n" |
| 4849 | " *===-----------------------------------------------------------------------===\n" |
| 4850 | " */\n" |
| 4851 | "\n" |
| 4852 | "/* Only include this if we're compiling for the windows platform. */\n" |
| 4853 | "#ifndef _MSC_VER\n" |
| 4854 | "#include_next <arm64intr.h>\n" |
| 4855 | "#else\n" |
| 4856 | "\n" |
| 4857 | "#ifndef __ARM64INTR_H\n" |
| 4858 | "#define __ARM64INTR_H\n" |
| 4859 | "\n" |
| 4860 | "typedef enum\n" |
| 4861 | "{\n" |
| 4862 | " _ARM64_BARRIER_SY = 0xF,\n" |
| 4863 | " _ARM64_BARRIER_ST = 0xE,\n" |
| 4864 | " _ARM64_BARRIER_LD = 0xD,\n" |
| 4865 | " _ARM64_BARRIER_ISH = 0xB,\n" |
| 4866 | " _ARM64_BARRIER_ISHST = 0xA,\n" |
| 4867 | " _ARM64_BARRIER_ISHLD = 0x9,\n" |
| 4868 | " _ARM64_BARRIER_NSH = 0x7,\n" |
| 4869 | " _ARM64_BARRIER_NSHST = 0x6,\n" |
| 4870 | " _ARM64_BARRIER_NSHLD = 0x5,\n" |
| 4871 | " _ARM64_BARRIER_OSH = 0x3,\n" |
| 4872 | " _ARM64_BARRIER_OSHST = 0x2,\n" |
| 4873 | " _ARM64_BARRIER_OSHLD = 0x1\n" |
| 4874 | "} _ARM64INTR_BARRIER_TYPE;\n" |
| 4875 | "\n" |
| 4876 | "#endif /* __ARM64INTR_H */\n" |
| 4877 | "#endif /* _MSC_VER */\n" |
| 4878 | "" } , |
| 4879 | { "/builtins/arm_acle.h" , "/*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------===\n" |
| 4880 | " *\n" |
| 4881 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 4882 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 4883 | " * in the Software without restriction, including without limitation the rights\n" |
| 4884 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 4885 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 4886 | " * furnished to do so, subject to the following conditions:\n" |
| 4887 | " *\n" |
| 4888 | " * The above copyright notice and this permission notice shall be included in\n" |
| 4889 | " * all copies or substantial portions of the Software.\n" |
| 4890 | " *\n" |
| 4891 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 4892 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 4893 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 4894 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 4895 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 4896 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 4897 | " * THE SOFTWARE.\n" |
| 4898 | " *\n" |
| 4899 | " *===-----------------------------------------------------------------------===\n" |
| 4900 | " */\n" |
| 4901 | "\n" |
| 4902 | "#ifndef __ARM_ACLE_H\n" |
| 4903 | "#define __ARM_ACLE_H\n" |
| 4904 | "\n" |
| 4905 | "#ifndef __ARM_ACLE\n" |
| 4906 | "#error \"ACLE intrinsics support not enabled.\"\n" |
| 4907 | "#endif\n" |
| 4908 | "\n" |
| 4909 | "#include <stdint.h>\n" |
| 4910 | "\n" |
| 4911 | "#if defined(__cplusplus)\n" |
| 4912 | "extern \"C\" {\n" |
| 4913 | "#endif\n" |
| 4914 | "\n" |
| 4915 | "/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */\n" |
| 4916 | "/* 8.3 Memory barriers */\n" |
| 4917 | "#if !defined(_MSC_VER)\n" |
| 4918 | "#define __dmb(i) __builtin_arm_dmb(i)\n" |
| 4919 | "#define __dsb(i) __builtin_arm_dsb(i)\n" |
| 4920 | "#define __isb(i) __builtin_arm_isb(i)\n" |
| 4921 | "#endif\n" |
| 4922 | "\n" |
| 4923 | "/* 8.4 Hints */\n" |
| 4924 | "\n" |
| 4925 | "#if !defined(_MSC_VER)\n" |
| 4926 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {\n" |
| 4927 | " __builtin_arm_wfi();\n" |
| 4928 | "}\n" |
| 4929 | "\n" |
| 4930 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {\n" |
| 4931 | " __builtin_arm_wfe();\n" |
| 4932 | "}\n" |
| 4933 | "\n" |
| 4934 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {\n" |
| 4935 | " __builtin_arm_sev();\n" |
| 4936 | "}\n" |
| 4937 | "\n" |
| 4938 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {\n" |
| 4939 | " __builtin_arm_sevl();\n" |
| 4940 | "}\n" |
| 4941 | "\n" |
| 4942 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {\n" |
| 4943 | " __builtin_arm_yield();\n" |
| 4944 | "}\n" |
| 4945 | "#endif\n" |
| 4946 | "\n" |
| 4947 | "#if __ARM_32BIT_STATE\n" |
| 4948 | "#define __dbg(t) __builtin_arm_dbg(t)\n" |
| 4949 | "#endif\n" |
| 4950 | "\n" |
| 4951 | "/* 8.5 Swap */\n" |
| 4952 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 4953 | "__swp(uint32_t __x, volatile uint32_t *__p) {\n" |
| 4954 | " uint32_t v;\n" |
| 4955 | " do\n" |
| 4956 | " v = __builtin_arm_ldrex(__p);\n" |
| 4957 | " while (__builtin_arm_strex(__x, __p));\n" |
| 4958 | " return v;\n" |
| 4959 | "}\n" |
| 4960 | "\n" |
| 4961 | "/* 8.6 Memory prefetch intrinsics */\n" |
| 4962 | "/* 8.6.1 Data prefetch */\n" |
| 4963 | "#define __pld(addr) __pldx(0, 0, 0, addr)\n" |
| 4964 | "\n" |
| 4965 | "#if __ARM_32BIT_STATE\n" |
| 4966 | "#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n" |
| 4967 | " __builtin_arm_prefetch(addr, access_kind, 1)\n" |
| 4968 | "#else\n" |
| 4969 | "#define __pldx(access_kind, cache_level, retention_policy, addr) \\\n" |
| 4970 | " __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)\n" |
| 4971 | "#endif\n" |
| 4972 | "\n" |
| 4973 | "/* 8.6.2 Instruction prefetch */\n" |
| 4974 | "#define __pli(addr) __plix(0, 0, addr)\n" |
| 4975 | "\n" |
| 4976 | "#if __ARM_32BIT_STATE\n" |
| 4977 | "#define __plix(cache_level, retention_policy, addr) \\\n" |
| 4978 | " __builtin_arm_prefetch(addr, 0, 0)\n" |
| 4979 | "#else\n" |
| 4980 | "#define __plix(cache_level, retention_policy, addr) \\\n" |
| 4981 | " __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)\n" |
| 4982 | "#endif\n" |
| 4983 | "\n" |
| 4984 | "/* 8.7 NOP */\n" |
| 4985 | "static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {\n" |
| 4986 | " __builtin_arm_nop();\n" |
| 4987 | "}\n" |
| 4988 | "\n" |
| 4989 | "/* 9 DATA-PROCESSING INTRINSICS */\n" |
| 4990 | "/* 9.2 Miscellaneous data-processing intrinsics */\n" |
| 4991 | "/* ROR */\n" |
| 4992 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 4993 | "__ror(uint32_t __x, uint32_t __y) {\n" |
| 4994 | " __y %= 32;\n" |
| 4995 | " if (__y == 0)\n" |
| 4996 | " return __x;\n" |
| 4997 | " return (__x >> __y) | (__x << (32 - __y));\n" |
| 4998 | "}\n" |
| 4999 | "\n" |
| 5000 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5001 | "__rorll(uint64_t __x, uint32_t __y) {\n" |
| 5002 | " __y %= 64;\n" |
| 5003 | " if (__y == 0)\n" |
| 5004 | " return __x;\n" |
| 5005 | " return (__x >> __y) | (__x << (64 - __y));\n" |
| 5006 | "}\n" |
| 5007 | "\n" |
| 5008 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
| 5009 | "__rorl(unsigned long __x, uint32_t __y) {\n" |
| 5010 | "#if __SIZEOF_LONG__ == 4\n" |
| 5011 | " return __ror(__x, __y);\n" |
| 5012 | "#else\n" |
| 5013 | " return __rorll(__x, __y);\n" |
| 5014 | "#endif\n" |
| 5015 | "}\n" |
| 5016 | "\n" |
| 5017 | "\n" |
| 5018 | "/* CLZ */\n" |
| 5019 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5020 | "__clz(uint32_t __t) {\n" |
| 5021 | " return __builtin_clz(__t);\n" |
| 5022 | "}\n" |
| 5023 | "\n" |
| 5024 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
| 5025 | "__clzl(unsigned long __t) {\n" |
| 5026 | " return __builtin_clzl(__t);\n" |
| 5027 | "}\n" |
| 5028 | "\n" |
| 5029 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5030 | "__clzll(uint64_t __t) {\n" |
| 5031 | " return __builtin_clzll(__t);\n" |
| 5032 | "}\n" |
| 5033 | "\n" |
| 5034 | "/* REV */\n" |
| 5035 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5036 | "__rev(uint32_t __t) {\n" |
| 5037 | " return __builtin_bswap32(__t);\n" |
| 5038 | "}\n" |
| 5039 | "\n" |
| 5040 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
| 5041 | "__revl(unsigned long __t) {\n" |
| 5042 | "#if __SIZEOF_LONG__ == 4\n" |
| 5043 | " return __builtin_bswap32(__t);\n" |
| 5044 | "#else\n" |
| 5045 | " return __builtin_bswap64(__t);\n" |
| 5046 | "#endif\n" |
| 5047 | "}\n" |
| 5048 | "\n" |
| 5049 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5050 | "__revll(uint64_t __t) {\n" |
| 5051 | " return __builtin_bswap64(__t);\n" |
| 5052 | "}\n" |
| 5053 | "\n" |
| 5054 | "/* REV16 */\n" |
| 5055 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5056 | "__rev16(uint32_t __t) {\n" |
| 5057 | " return __ror(__rev(__t), 16);\n" |
| 5058 | "}\n" |
| 5059 | "\n" |
| 5060 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5061 | "__rev16ll(uint64_t __t) {\n" |
| 5062 | " return (((uint64_t)__rev16(__t >> 32)) << 32) | __rev16(__t);\n" |
| 5063 | "}\n" |
| 5064 | "\n" |
| 5065 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
| 5066 | "__rev16l(unsigned long __t) {\n" |
| 5067 | "#if __SIZEOF_LONG__ == 4\n" |
| 5068 | " return __rev16(__t);\n" |
| 5069 | "#else\n" |
| 5070 | " return __rev16ll(__t);\n" |
| 5071 | "#endif\n" |
| 5072 | "}\n" |
| 5073 | "\n" |
| 5074 | "/* REVSH */\n" |
| 5075 | "static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5076 | "__revsh(int16_t __t) {\n" |
| 5077 | " return __builtin_bswap16(__t);\n" |
| 5078 | "}\n" |
| 5079 | "\n" |
| 5080 | "/* RBIT */\n" |
| 5081 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5082 | "__rbit(uint32_t __t) {\n" |
| 5083 | " return __builtin_arm_rbit(__t);\n" |
| 5084 | "}\n" |
| 5085 | "\n" |
| 5086 | "static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5087 | "__rbitll(uint64_t __t) {\n" |
| 5088 | "#if __ARM_32BIT_STATE\n" |
| 5089 | " return (((uint64_t)__builtin_arm_rbit(__t)) << 32) |\n" |
| 5090 | " __builtin_arm_rbit(__t >> 32);\n" |
| 5091 | "#else\n" |
| 5092 | " return __builtin_arm_rbit64(__t);\n" |
| 5093 | "#endif\n" |
| 5094 | "}\n" |
| 5095 | "\n" |
| 5096 | "static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))\n" |
| 5097 | "__rbitl(unsigned long __t) {\n" |
| 5098 | "#if __SIZEOF_LONG__ == 4\n" |
| 5099 | " return __rbit(__t);\n" |
| 5100 | "#else\n" |
| 5101 | " return __rbitll(__t);\n" |
| 5102 | "#endif\n" |
| 5103 | "}\n" |
| 5104 | "\n" |
| 5105 | "/*\n" |
| 5106 | " * 9.3 16-bit multiplications\n" |
| 5107 | " */\n" |
| 5108 | "#if __ARM_FEATURE_DSP\n" |
| 5109 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
| 5110 | "__smulbb(int32_t __a, int32_t __b) {\n" |
| 5111 | " return __builtin_arm_smulbb(__a, __b);\n" |
| 5112 | "}\n" |
| 5113 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
| 5114 | "__smulbt(int32_t __a, int32_t __b) {\n" |
| 5115 | " return __builtin_arm_smulbt(__a, __b);\n" |
| 5116 | "}\n" |
| 5117 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
| 5118 | "__smultb(int32_t __a, int32_t __b) {\n" |
| 5119 | " return __builtin_arm_smultb(__a, __b);\n" |
| 5120 | "}\n" |
| 5121 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
| 5122 | "__smultt(int32_t __a, int32_t __b) {\n" |
| 5123 | " return __builtin_arm_smultt(__a, __b);\n" |
| 5124 | "}\n" |
| 5125 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
| 5126 | "__smulwb(int32_t __a, int32_t __b) {\n" |
| 5127 | " return __builtin_arm_smulwb(__a, __b);\n" |
| 5128 | "}\n" |
| 5129 | "static __inline__ int32_t __attribute__((__always_inline__,__nodebug__))\n" |
| 5130 | "__smulwt(int32_t __a, int32_t __b) {\n" |
| 5131 | " return __builtin_arm_smulwt(__a, __b);\n" |
| 5132 | "}\n" |
| 5133 | "#endif\n" |
| 5134 | "\n" |
| 5135 | "/*\n" |
| 5136 | " * 9.4 Saturating intrinsics\n" |
| 5137 | " *\n" |
| 5138 | " * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag\n" |
| 5139 | " * intrinsics are implemented and the flag is enabled.\n" |
| 5140 | " */\n" |
| 5141 | "/* 9.4.1 Width-specified saturation intrinsics */\n" |
| 5142 | "#if __ARM_FEATURE_SAT\n" |
| 5143 | "#define __ssat(x, y) __builtin_arm_ssat(x, y)\n" |
| 5144 | "#define __usat(x, y) __builtin_arm_usat(x, y)\n" |
| 5145 | "#endif\n" |
| 5146 | "\n" |
| 5147 | "/* 9.4.2 Saturating addition and subtraction intrinsics */\n" |
| 5148 | "#if __ARM_FEATURE_DSP\n" |
| 5149 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5150 | "__qadd(int32_t __t, int32_t __v) {\n" |
| 5151 | " return __builtin_arm_qadd(__t, __v);\n" |
| 5152 | "}\n" |
| 5153 | "\n" |
| 5154 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5155 | "__qsub(int32_t __t, int32_t __v) {\n" |
| 5156 | " return __builtin_arm_qsub(__t, __v);\n" |
| 5157 | "}\n" |
| 5158 | "\n" |
| 5159 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5160 | "__qdbl(int32_t __t) {\n" |
| 5161 | " return __builtin_arm_qadd(__t, __t);\n" |
| 5162 | "}\n" |
| 5163 | "#endif\n" |
| 5164 | "\n" |
| 5165 | "/* 9.4.3 Accumultating multiplications */\n" |
| 5166 | "#if __ARM_FEATURE_DSP\n" |
| 5167 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5168 | "__smlabb(int32_t __a, int32_t __b, int32_t __c) {\n" |
| 5169 | " return __builtin_arm_smlabb(__a, __b, __c);\n" |
| 5170 | "}\n" |
| 5171 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5172 | "__smlabt(int32_t __a, int32_t __b, int32_t __c) {\n" |
| 5173 | " return __builtin_arm_smlabt(__a, __b, __c);\n" |
| 5174 | "}\n" |
| 5175 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5176 | "__smlatb(int32_t __a, int32_t __b, int32_t __c) {\n" |
| 5177 | " return __builtin_arm_smlatb(__a, __b, __c);\n" |
| 5178 | "}\n" |
| 5179 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5180 | "__smlatt(int32_t __a, int32_t __b, int32_t __c) {\n" |
| 5181 | " return __builtin_arm_smlatt(__a, __b, __c);\n" |
| 5182 | "}\n" |
| 5183 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5184 | "__smlawb(int32_t __a, int32_t __b, int32_t __c) {\n" |
| 5185 | " return __builtin_arm_smlawb(__a, __b, __c);\n" |
| 5186 | "}\n" |
| 5187 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5188 | "__smlawt(int32_t __a, int32_t __b, int32_t __c) {\n" |
| 5189 | " return __builtin_arm_smlawt(__a, __b, __c);\n" |
| 5190 | "}\n" |
| 5191 | "#endif\n" |
| 5192 | "\n" |
| 5193 | "\n" |
| 5194 | "/* 9.5.4 Parallel 16-bit saturation */\n" |
| 5195 | "#if __ARM_FEATURE_SIMD32\n" |
| 5196 | "#define __ssat16(x, y) __builtin_arm_ssat16(x, y)\n" |
| 5197 | "#define __usat16(x, y) __builtin_arm_usat16(x, y)\n" |
| 5198 | "#endif\n" |
| 5199 | "\n" |
| 5200 | "/* 9.5.5 Packing and unpacking */\n" |
| 5201 | "#if __ARM_FEATURE_SIMD32\n" |
| 5202 | "typedef int32_t int8x4_t;\n" |
| 5203 | "typedef int32_t int16x2_t;\n" |
| 5204 | "typedef uint32_t uint8x4_t;\n" |
| 5205 | "typedef uint32_t uint16x2_t;\n" |
| 5206 | "\n" |
| 5207 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5208 | "__sxtab16(int16x2_t __a, int8x4_t __b) {\n" |
| 5209 | " return __builtin_arm_sxtab16(__a, __b);\n" |
| 5210 | "}\n" |
| 5211 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5212 | "__sxtb16(int8x4_t __a) {\n" |
| 5213 | " return __builtin_arm_sxtb16(__a);\n" |
| 5214 | "}\n" |
| 5215 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5216 | "__uxtab16(int16x2_t __a, int8x4_t __b) {\n" |
| 5217 | " return __builtin_arm_uxtab16(__a, __b);\n" |
| 5218 | "}\n" |
| 5219 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5220 | "__uxtb16(int8x4_t __a) {\n" |
| 5221 | " return __builtin_arm_uxtb16(__a);\n" |
| 5222 | "}\n" |
| 5223 | "#endif\n" |
| 5224 | "\n" |
| 5225 | "/* 9.5.6 Parallel selection */\n" |
| 5226 | "#if __ARM_FEATURE_SIMD32\n" |
| 5227 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5228 | "__sel(uint8x4_t __a, uint8x4_t __b) {\n" |
| 5229 | " return __builtin_arm_sel(__a, __b);\n" |
| 5230 | "}\n" |
| 5231 | "#endif\n" |
| 5232 | "\n" |
| 5233 | "/* 9.5.7 Parallel 8-bit addition and subtraction */\n" |
| 5234 | "#if __ARM_FEATURE_SIMD32\n" |
| 5235 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5236 | "__qadd8(int8x4_t __a, int8x4_t __b) {\n" |
| 5237 | " return __builtin_arm_qadd8(__a, __b);\n" |
| 5238 | "}\n" |
| 5239 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5240 | "__qsub8(int8x4_t __a, int8x4_t __b) {\n" |
| 5241 | " return __builtin_arm_qsub8(__a, __b);\n" |
| 5242 | "}\n" |
| 5243 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5244 | "__sadd8(int8x4_t __a, int8x4_t __b) {\n" |
| 5245 | " return __builtin_arm_sadd8(__a, __b);\n" |
| 5246 | "}\n" |
| 5247 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5248 | "__shadd8(int8x4_t __a, int8x4_t __b) {\n" |
| 5249 | " return __builtin_arm_shadd8(__a, __b);\n" |
| 5250 | "}\n" |
| 5251 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5252 | "__shsub8(int8x4_t __a, int8x4_t __b) {\n" |
| 5253 | " return __builtin_arm_shsub8(__a, __b);\n" |
| 5254 | "}\n" |
| 5255 | "static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5256 | "__ssub8(int8x4_t __a, int8x4_t __b) {\n" |
| 5257 | " return __builtin_arm_ssub8(__a, __b);\n" |
| 5258 | "}\n" |
| 5259 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5260 | "__uadd8(uint8x4_t __a, uint8x4_t __b) {\n" |
| 5261 | " return __builtin_arm_uadd8(__a, __b);\n" |
| 5262 | "}\n" |
| 5263 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5264 | "__uhadd8(uint8x4_t __a, uint8x4_t __b) {\n" |
| 5265 | " return __builtin_arm_uhadd8(__a, __b);\n" |
| 5266 | "}\n" |
| 5267 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5268 | "__uhsub8(uint8x4_t __a, uint8x4_t __b) {\n" |
| 5269 | " return __builtin_arm_uhsub8(__a, __b);\n" |
| 5270 | "}\n" |
| 5271 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5272 | "__uqadd8(uint8x4_t __a, uint8x4_t __b) {\n" |
| 5273 | " return __builtin_arm_uqadd8(__a, __b);\n" |
| 5274 | "}\n" |
| 5275 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5276 | "__uqsub8(uint8x4_t __a, uint8x4_t __b) {\n" |
| 5277 | " return __builtin_arm_uqsub8(__a, __b);\n" |
| 5278 | "}\n" |
| 5279 | "static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5280 | "__usub8(uint8x4_t __a, uint8x4_t __b) {\n" |
| 5281 | " return __builtin_arm_usub8(__a, __b);\n" |
| 5282 | "}\n" |
| 5283 | "#endif\n" |
| 5284 | "\n" |
| 5285 | "/* 9.5.8 Sum of 8-bit absolute differences */\n" |
| 5286 | "#if __ARM_FEATURE_SIMD32\n" |
| 5287 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5288 | "__usad8(uint8x4_t __a, uint8x4_t __b) {\n" |
| 5289 | " return __builtin_arm_usad8(__a, __b);\n" |
| 5290 | "}\n" |
| 5291 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5292 | "__usada8(uint8x4_t __a, uint8x4_t __b, uint32_t __c) {\n" |
| 5293 | " return __builtin_arm_usada8(__a, __b, __c);\n" |
| 5294 | "}\n" |
| 5295 | "#endif\n" |
| 5296 | "\n" |
| 5297 | "/* 9.5.9 Parallel 16-bit addition and subtraction */\n" |
| 5298 | "#if __ARM_FEATURE_SIMD32\n" |
| 5299 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5300 | "__qadd16(int16x2_t __a, int16x2_t __b) {\n" |
| 5301 | " return __builtin_arm_qadd16(__a, __b);\n" |
| 5302 | "}\n" |
| 5303 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5304 | "__qasx(int16x2_t __a, int16x2_t __b) {\n" |
| 5305 | " return __builtin_arm_qasx(__a, __b);\n" |
| 5306 | "}\n" |
| 5307 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5308 | "__qsax(int16x2_t __a, int16x2_t __b) {\n" |
| 5309 | " return __builtin_arm_qsax(__a, __b);\n" |
| 5310 | "}\n" |
| 5311 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5312 | "__qsub16(int16x2_t __a, int16x2_t __b) {\n" |
| 5313 | " return __builtin_arm_qsub16(__a, __b);\n" |
| 5314 | "}\n" |
| 5315 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5316 | "__sadd16(int16x2_t __a, int16x2_t __b) {\n" |
| 5317 | " return __builtin_arm_sadd16(__a, __b);\n" |
| 5318 | "}\n" |
| 5319 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5320 | "__sasx(int16x2_t __a, int16x2_t __b) {\n" |
| 5321 | " return __builtin_arm_sasx(__a, __b);\n" |
| 5322 | "}\n" |
| 5323 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5324 | "__shadd16(int16x2_t __a, int16x2_t __b) {\n" |
| 5325 | " return __builtin_arm_shadd16(__a, __b);\n" |
| 5326 | "}\n" |
| 5327 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5328 | "__shasx(int16x2_t __a, int16x2_t __b) {\n" |
| 5329 | " return __builtin_arm_shasx(__a, __b);\n" |
| 5330 | "}\n" |
| 5331 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5332 | "__shsax(int16x2_t __a, int16x2_t __b) {\n" |
| 5333 | " return __builtin_arm_shsax(__a, __b);\n" |
| 5334 | "}\n" |
| 5335 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5336 | "__shsub16(int16x2_t __a, int16x2_t __b) {\n" |
| 5337 | " return __builtin_arm_shsub16(__a, __b);\n" |
| 5338 | "}\n" |
| 5339 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5340 | "__ssax(int16x2_t __a, int16x2_t __b) {\n" |
| 5341 | " return __builtin_arm_ssax(__a, __b);\n" |
| 5342 | "}\n" |
| 5343 | "static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5344 | "__ssub16(int16x2_t __a, int16x2_t __b) {\n" |
| 5345 | " return __builtin_arm_ssub16(__a, __b);\n" |
| 5346 | "}\n" |
| 5347 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5348 | "__uadd16(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5349 | " return __builtin_arm_uadd16(__a, __b);\n" |
| 5350 | "}\n" |
| 5351 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5352 | "__uasx(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5353 | " return __builtin_arm_uasx(__a, __b);\n" |
| 5354 | "}\n" |
| 5355 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5356 | "__uhadd16(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5357 | " return __builtin_arm_uhadd16(__a, __b);\n" |
| 5358 | "}\n" |
| 5359 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5360 | "__uhasx(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5361 | " return __builtin_arm_uhasx(__a, __b);\n" |
| 5362 | "}\n" |
| 5363 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5364 | "__uhsax(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5365 | " return __builtin_arm_uhsax(__a, __b);\n" |
| 5366 | "}\n" |
| 5367 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5368 | "__uhsub16(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5369 | " return __builtin_arm_uhsub16(__a, __b);\n" |
| 5370 | "}\n" |
| 5371 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5372 | "__uqadd16(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5373 | " return __builtin_arm_uqadd16(__a, __b);\n" |
| 5374 | "}\n" |
| 5375 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5376 | "__uqasx(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5377 | " return __builtin_arm_uqasx(__a, __b);\n" |
| 5378 | "}\n" |
| 5379 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5380 | "__uqsax(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5381 | " return __builtin_arm_uqsax(__a, __b);\n" |
| 5382 | "}\n" |
| 5383 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5384 | "__uqsub16(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5385 | " return __builtin_arm_uqsub16(__a, __b);\n" |
| 5386 | "}\n" |
| 5387 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5388 | "__usax(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5389 | " return __builtin_arm_usax(__a, __b);\n" |
| 5390 | "}\n" |
| 5391 | "static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5392 | "__usub16(uint16x2_t __a, uint16x2_t __b) {\n" |
| 5393 | " return __builtin_arm_usub16(__a, __b);\n" |
| 5394 | "}\n" |
| 5395 | "#endif\n" |
| 5396 | "\n" |
| 5397 | "/* 9.5.10 Parallel 16-bit multiplications */\n" |
| 5398 | "#if __ARM_FEATURE_SIMD32\n" |
| 5399 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5400 | "__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {\n" |
| 5401 | " return __builtin_arm_smlad(__a, __b, __c);\n" |
| 5402 | "}\n" |
| 5403 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5404 | "__smladx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n" |
| 5405 | " return __builtin_arm_smladx(__a, __b, __c);\n" |
| 5406 | "}\n" |
| 5407 | "static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5408 | "__smlald(int16x2_t __a, int16x2_t __b, int64_t __c) {\n" |
| 5409 | " return __builtin_arm_smlald(__a, __b, __c);\n" |
| 5410 | "}\n" |
| 5411 | "static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5412 | "__smlaldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n" |
| 5413 | " return __builtin_arm_smlaldx(__a, __b, __c);\n" |
| 5414 | "}\n" |
| 5415 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5416 | "__smlsd(int16x2_t __a, int16x2_t __b, int32_t __c) {\n" |
| 5417 | " return __builtin_arm_smlsd(__a, __b, __c);\n" |
| 5418 | "}\n" |
| 5419 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5420 | "__smlsdx(int16x2_t __a, int16x2_t __b, int32_t __c) {\n" |
| 5421 | " return __builtin_arm_smlsdx(__a, __b, __c);\n" |
| 5422 | "}\n" |
| 5423 | "static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5424 | "__smlsld(int16x2_t __a, int16x2_t __b, int64_t __c) {\n" |
| 5425 | " return __builtin_arm_smlsld(__a, __b, __c);\n" |
| 5426 | "}\n" |
| 5427 | "static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5428 | "__smlsldx(int16x2_t __a, int16x2_t __b, int64_t __c) {\n" |
| 5429 | " return __builtin_arm_smlsldx(__a, __b, __c);\n" |
| 5430 | "}\n" |
| 5431 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5432 | "__smuad(int16x2_t __a, int16x2_t __b) {\n" |
| 5433 | " return __builtin_arm_smuad(__a, __b);\n" |
| 5434 | "}\n" |
| 5435 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5436 | "__smuadx(int16x2_t __a, int16x2_t __b) {\n" |
| 5437 | " return __builtin_arm_smuadx(__a, __b);\n" |
| 5438 | "}\n" |
| 5439 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5440 | "__smusd(int16x2_t __a, int16x2_t __b) {\n" |
| 5441 | " return __builtin_arm_smusd(__a, __b);\n" |
| 5442 | "}\n" |
| 5443 | "static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5444 | "__smusdx(int16x2_t __a, int16x2_t __b) {\n" |
| 5445 | " return __builtin_arm_smusdx(__a, __b);\n" |
| 5446 | "}\n" |
| 5447 | "#endif\n" |
| 5448 | "\n" |
| 5449 | "/* 9.7 CRC32 intrinsics */\n" |
| 5450 | "#if __ARM_FEATURE_CRC32\n" |
| 5451 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5452 | "__crc32b(uint32_t __a, uint8_t __b) {\n" |
| 5453 | " return __builtin_arm_crc32b(__a, __b);\n" |
| 5454 | "}\n" |
| 5455 | "\n" |
| 5456 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5457 | "__crc32h(uint32_t __a, uint16_t __b) {\n" |
| 5458 | " return __builtin_arm_crc32h(__a, __b);\n" |
| 5459 | "}\n" |
| 5460 | "\n" |
| 5461 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5462 | "__crc32w(uint32_t __a, uint32_t __b) {\n" |
| 5463 | " return __builtin_arm_crc32w(__a, __b);\n" |
| 5464 | "}\n" |
| 5465 | "\n" |
| 5466 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5467 | "__crc32d(uint32_t __a, uint64_t __b) {\n" |
| 5468 | " return __builtin_arm_crc32d(__a, __b);\n" |
| 5469 | "}\n" |
| 5470 | "\n" |
| 5471 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5472 | "__crc32cb(uint32_t __a, uint8_t __b) {\n" |
| 5473 | " return __builtin_arm_crc32cb(__a, __b);\n" |
| 5474 | "}\n" |
| 5475 | "\n" |
| 5476 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5477 | "__crc32ch(uint32_t __a, uint16_t __b) {\n" |
| 5478 | " return __builtin_arm_crc32ch(__a, __b);\n" |
| 5479 | "}\n" |
| 5480 | "\n" |
| 5481 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5482 | "__crc32cw(uint32_t __a, uint32_t __b) {\n" |
| 5483 | " return __builtin_arm_crc32cw(__a, __b);\n" |
| 5484 | "}\n" |
| 5485 | "\n" |
| 5486 | "static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))\n" |
| 5487 | "__crc32cd(uint32_t __a, uint64_t __b) {\n" |
| 5488 | " return __builtin_arm_crc32cd(__a, __b);\n" |
| 5489 | "}\n" |
| 5490 | "#endif\n" |
| 5491 | "\n" |
| 5492 | "/* 10.1 Special register intrinsics */\n" |
| 5493 | "#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)\n" |
| 5494 | "#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)\n" |
| 5495 | "#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)\n" |
| 5496 | "#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)\n" |
| 5497 | "#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)\n" |
| 5498 | "#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)\n" |
| 5499 | "\n" |
| 5500 | "#if defined(__cplusplus)\n" |
| 5501 | "}\n" |
| 5502 | "#endif\n" |
| 5503 | "\n" |
| 5504 | "#endif /* __ARM_ACLE_H */\n" |
| 5505 | "" } , |
| 5506 | { "/builtins/arm_fp16.h" , "/*===---- arm_fp16.h - ARM FP16 intrinsics ---------------------------------===\n" |
| 5507 | " *\n" |
| 5508 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 5509 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 5510 | " * in the Software without restriction, including without limitation the rights\n" |
| 5511 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 5512 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 5513 | " * furnished to do so, subject to the following conditions:\n" |
| 5514 | " *\n" |
| 5515 | " * The above copyright notice and this permission notice shall be included in\n" |
| 5516 | " * all copies or substantial portions of the Software.\n" |
| 5517 | " *\n" |
| 5518 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 5519 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 5520 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 5521 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 5522 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 5523 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 5524 | " * THE SOFTWARE.\n" |
| 5525 | " *\n" |
| 5526 | " *===-----------------------------------------------------------------------===\n" |
| 5527 | " */\n" |
| 5528 | "\n" |
| 5529 | "#ifndef __ARM_FP16_H\n" |
| 5530 | "#define __ARM_FP16_H\n" |
| 5531 | "\n" |
| 5532 | "#include <stdint.h>\n" |
| 5533 | "\n" |
| 5534 | "typedef __fp16 float16_t;\n" |
| 5535 | "#define __ai static inline __attribute__((__always_inline__, __nodebug__))\n" |
| 5536 | "\n" |
| 5537 | "#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)\n" |
| 5538 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5539 | "#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5540 | " float16_t __s0 = __p0; \\\n" |
| 5541 | " float16_t __s1 = __p1; \\\n" |
| 5542 | " float16_t __ret; \\\n" |
| 5543 | " __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n" |
| 5544 | " __ret; \\\n" |
| 5545 | "})\n" |
| 5546 | "#else\n" |
| 5547 | "#define vabdh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5548 | " float16_t __s0 = __p0; \\\n" |
| 5549 | " float16_t __s1 = __p1; \\\n" |
| 5550 | " float16_t __ret; \\\n" |
| 5551 | " __ret = (float16_t) __builtin_neon_vabdh_f16(__s0, __s1); \\\n" |
| 5552 | " __ret; \\\n" |
| 5553 | "})\n" |
| 5554 | "#endif\n" |
| 5555 | "\n" |
| 5556 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5557 | "#define vabsh_f16(__p0) __extension__ ({ \\\n" |
| 5558 | " float16_t __s0 = __p0; \\\n" |
| 5559 | " float16_t __ret; \\\n" |
| 5560 | " __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n" |
| 5561 | " __ret; \\\n" |
| 5562 | "})\n" |
| 5563 | "#else\n" |
| 5564 | "#define vabsh_f16(__p0) __extension__ ({ \\\n" |
| 5565 | " float16_t __s0 = __p0; \\\n" |
| 5566 | " float16_t __ret; \\\n" |
| 5567 | " __ret = (float16_t) __builtin_neon_vabsh_f16(__s0); \\\n" |
| 5568 | " __ret; \\\n" |
| 5569 | "})\n" |
| 5570 | "#endif\n" |
| 5571 | "\n" |
| 5572 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5573 | "#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5574 | " float16_t __s0 = __p0; \\\n" |
| 5575 | " float16_t __s1 = __p1; \\\n" |
| 5576 | " float16_t __ret; \\\n" |
| 5577 | " __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n" |
| 5578 | " __ret; \\\n" |
| 5579 | "})\n" |
| 5580 | "#else\n" |
| 5581 | "#define vaddh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5582 | " float16_t __s0 = __p0; \\\n" |
| 5583 | " float16_t __s1 = __p1; \\\n" |
| 5584 | " float16_t __ret; \\\n" |
| 5585 | " __ret = (float16_t) __builtin_neon_vaddh_f16(__s0, __s1); \\\n" |
| 5586 | " __ret; \\\n" |
| 5587 | "})\n" |
| 5588 | "#endif\n" |
| 5589 | "\n" |
| 5590 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5591 | "#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5592 | " float16_t __s0 = __p0; \\\n" |
| 5593 | " float16_t __s1 = __p1; \\\n" |
| 5594 | " uint16_t __ret; \\\n" |
| 5595 | " __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n" |
| 5596 | " __ret; \\\n" |
| 5597 | "})\n" |
| 5598 | "#else\n" |
| 5599 | "#define vcageh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5600 | " float16_t __s0 = __p0; \\\n" |
| 5601 | " float16_t __s1 = __p1; \\\n" |
| 5602 | " uint16_t __ret; \\\n" |
| 5603 | " __ret = (uint16_t) __builtin_neon_vcageh_f16(__s0, __s1); \\\n" |
| 5604 | " __ret; \\\n" |
| 5605 | "})\n" |
| 5606 | "#endif\n" |
| 5607 | "\n" |
| 5608 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5609 | "#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5610 | " float16_t __s0 = __p0; \\\n" |
| 5611 | " float16_t __s1 = __p1; \\\n" |
| 5612 | " uint16_t __ret; \\\n" |
| 5613 | " __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n" |
| 5614 | " __ret; \\\n" |
| 5615 | "})\n" |
| 5616 | "#else\n" |
| 5617 | "#define vcagth_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5618 | " float16_t __s0 = __p0; \\\n" |
| 5619 | " float16_t __s1 = __p1; \\\n" |
| 5620 | " uint16_t __ret; \\\n" |
| 5621 | " __ret = (uint16_t) __builtin_neon_vcagth_f16(__s0, __s1); \\\n" |
| 5622 | " __ret; \\\n" |
| 5623 | "})\n" |
| 5624 | "#endif\n" |
| 5625 | "\n" |
| 5626 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5627 | "#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5628 | " float16_t __s0 = __p0; \\\n" |
| 5629 | " float16_t __s1 = __p1; \\\n" |
| 5630 | " uint16_t __ret; \\\n" |
| 5631 | " __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n" |
| 5632 | " __ret; \\\n" |
| 5633 | "})\n" |
| 5634 | "#else\n" |
| 5635 | "#define vcaleh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5636 | " float16_t __s0 = __p0; \\\n" |
| 5637 | " float16_t __s1 = __p1; \\\n" |
| 5638 | " uint16_t __ret; \\\n" |
| 5639 | " __ret = (uint16_t) __builtin_neon_vcaleh_f16(__s0, __s1); \\\n" |
| 5640 | " __ret; \\\n" |
| 5641 | "})\n" |
| 5642 | "#endif\n" |
| 5643 | "\n" |
| 5644 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5645 | "#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5646 | " float16_t __s0 = __p0; \\\n" |
| 5647 | " float16_t __s1 = __p1; \\\n" |
| 5648 | " uint16_t __ret; \\\n" |
| 5649 | " __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n" |
| 5650 | " __ret; \\\n" |
| 5651 | "})\n" |
| 5652 | "#else\n" |
| 5653 | "#define vcalth_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5654 | " float16_t __s0 = __p0; \\\n" |
| 5655 | " float16_t __s1 = __p1; \\\n" |
| 5656 | " uint16_t __ret; \\\n" |
| 5657 | " __ret = (uint16_t) __builtin_neon_vcalth_f16(__s0, __s1); \\\n" |
| 5658 | " __ret; \\\n" |
| 5659 | "})\n" |
| 5660 | "#endif\n" |
| 5661 | "\n" |
| 5662 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5663 | "#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5664 | " float16_t __s0 = __p0; \\\n" |
| 5665 | " float16_t __s1 = __p1; \\\n" |
| 5666 | " uint16_t __ret; \\\n" |
| 5667 | " __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n" |
| 5668 | " __ret; \\\n" |
| 5669 | "})\n" |
| 5670 | "#else\n" |
| 5671 | "#define vceqh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5672 | " float16_t __s0 = __p0; \\\n" |
| 5673 | " float16_t __s1 = __p1; \\\n" |
| 5674 | " uint16_t __ret; \\\n" |
| 5675 | " __ret = (uint16_t) __builtin_neon_vceqh_f16(__s0, __s1); \\\n" |
| 5676 | " __ret; \\\n" |
| 5677 | "})\n" |
| 5678 | "#endif\n" |
| 5679 | "\n" |
| 5680 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5681 | "#define vceqzh_f16(__p0) __extension__ ({ \\\n" |
| 5682 | " float16_t __s0 = __p0; \\\n" |
| 5683 | " uint16_t __ret; \\\n" |
| 5684 | " __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n" |
| 5685 | " __ret; \\\n" |
| 5686 | "})\n" |
| 5687 | "#else\n" |
| 5688 | "#define vceqzh_f16(__p0) __extension__ ({ \\\n" |
| 5689 | " float16_t __s0 = __p0; \\\n" |
| 5690 | " uint16_t __ret; \\\n" |
| 5691 | " __ret = (uint16_t) __builtin_neon_vceqzh_f16(__s0); \\\n" |
| 5692 | " __ret; \\\n" |
| 5693 | "})\n" |
| 5694 | "#endif\n" |
| 5695 | "\n" |
| 5696 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5697 | "#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5698 | " float16_t __s0 = __p0; \\\n" |
| 5699 | " float16_t __s1 = __p1; \\\n" |
| 5700 | " uint16_t __ret; \\\n" |
| 5701 | " __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n" |
| 5702 | " __ret; \\\n" |
| 5703 | "})\n" |
| 5704 | "#else\n" |
| 5705 | "#define vcgeh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5706 | " float16_t __s0 = __p0; \\\n" |
| 5707 | " float16_t __s1 = __p1; \\\n" |
| 5708 | " uint16_t __ret; \\\n" |
| 5709 | " __ret = (uint16_t) __builtin_neon_vcgeh_f16(__s0, __s1); \\\n" |
| 5710 | " __ret; \\\n" |
| 5711 | "})\n" |
| 5712 | "#endif\n" |
| 5713 | "\n" |
| 5714 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5715 | "#define vcgezh_f16(__p0) __extension__ ({ \\\n" |
| 5716 | " float16_t __s0 = __p0; \\\n" |
| 5717 | " uint16_t __ret; \\\n" |
| 5718 | " __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n" |
| 5719 | " __ret; \\\n" |
| 5720 | "})\n" |
| 5721 | "#else\n" |
| 5722 | "#define vcgezh_f16(__p0) __extension__ ({ \\\n" |
| 5723 | " float16_t __s0 = __p0; \\\n" |
| 5724 | " uint16_t __ret; \\\n" |
| 5725 | " __ret = (uint16_t) __builtin_neon_vcgezh_f16(__s0); \\\n" |
| 5726 | " __ret; \\\n" |
| 5727 | "})\n" |
| 5728 | "#endif\n" |
| 5729 | "\n" |
| 5730 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5731 | "#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5732 | " float16_t __s0 = __p0; \\\n" |
| 5733 | " float16_t __s1 = __p1; \\\n" |
| 5734 | " uint16_t __ret; \\\n" |
| 5735 | " __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n" |
| 5736 | " __ret; \\\n" |
| 5737 | "})\n" |
| 5738 | "#else\n" |
| 5739 | "#define vcgth_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5740 | " float16_t __s0 = __p0; \\\n" |
| 5741 | " float16_t __s1 = __p1; \\\n" |
| 5742 | " uint16_t __ret; \\\n" |
| 5743 | " __ret = (uint16_t) __builtin_neon_vcgth_f16(__s0, __s1); \\\n" |
| 5744 | " __ret; \\\n" |
| 5745 | "})\n" |
| 5746 | "#endif\n" |
| 5747 | "\n" |
| 5748 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5749 | "#define vcgtzh_f16(__p0) __extension__ ({ \\\n" |
| 5750 | " float16_t __s0 = __p0; \\\n" |
| 5751 | " uint16_t __ret; \\\n" |
| 5752 | " __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n" |
| 5753 | " __ret; \\\n" |
| 5754 | "})\n" |
| 5755 | "#else\n" |
| 5756 | "#define vcgtzh_f16(__p0) __extension__ ({ \\\n" |
| 5757 | " float16_t __s0 = __p0; \\\n" |
| 5758 | " uint16_t __ret; \\\n" |
| 5759 | " __ret = (uint16_t) __builtin_neon_vcgtzh_f16(__s0); \\\n" |
| 5760 | " __ret; \\\n" |
| 5761 | "})\n" |
| 5762 | "#endif\n" |
| 5763 | "\n" |
| 5764 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5765 | "#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5766 | " float16_t __s0 = __p0; \\\n" |
| 5767 | " float16_t __s1 = __p1; \\\n" |
| 5768 | " uint16_t __ret; \\\n" |
| 5769 | " __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n" |
| 5770 | " __ret; \\\n" |
| 5771 | "})\n" |
| 5772 | "#else\n" |
| 5773 | "#define vcleh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5774 | " float16_t __s0 = __p0; \\\n" |
| 5775 | " float16_t __s1 = __p1; \\\n" |
| 5776 | " uint16_t __ret; \\\n" |
| 5777 | " __ret = (uint16_t) __builtin_neon_vcleh_f16(__s0, __s1); \\\n" |
| 5778 | " __ret; \\\n" |
| 5779 | "})\n" |
| 5780 | "#endif\n" |
| 5781 | "\n" |
| 5782 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5783 | "#define vclezh_f16(__p0) __extension__ ({ \\\n" |
| 5784 | " float16_t __s0 = __p0; \\\n" |
| 5785 | " uint16_t __ret; \\\n" |
| 5786 | " __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n" |
| 5787 | " __ret; \\\n" |
| 5788 | "})\n" |
| 5789 | "#else\n" |
| 5790 | "#define vclezh_f16(__p0) __extension__ ({ \\\n" |
| 5791 | " float16_t __s0 = __p0; \\\n" |
| 5792 | " uint16_t __ret; \\\n" |
| 5793 | " __ret = (uint16_t) __builtin_neon_vclezh_f16(__s0); \\\n" |
| 5794 | " __ret; \\\n" |
| 5795 | "})\n" |
| 5796 | "#endif\n" |
| 5797 | "\n" |
| 5798 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5799 | "#define vclth_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5800 | " float16_t __s0 = __p0; \\\n" |
| 5801 | " float16_t __s1 = __p1; \\\n" |
| 5802 | " uint16_t __ret; \\\n" |
| 5803 | " __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n" |
| 5804 | " __ret; \\\n" |
| 5805 | "})\n" |
| 5806 | "#else\n" |
| 5807 | "#define vclth_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5808 | " float16_t __s0 = __p0; \\\n" |
| 5809 | " float16_t __s1 = __p1; \\\n" |
| 5810 | " uint16_t __ret; \\\n" |
| 5811 | " __ret = (uint16_t) __builtin_neon_vclth_f16(__s0, __s1); \\\n" |
| 5812 | " __ret; \\\n" |
| 5813 | "})\n" |
| 5814 | "#endif\n" |
| 5815 | "\n" |
| 5816 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5817 | "#define vcltzh_f16(__p0) __extension__ ({ \\\n" |
| 5818 | " float16_t __s0 = __p0; \\\n" |
| 5819 | " uint16_t __ret; \\\n" |
| 5820 | " __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n" |
| 5821 | " __ret; \\\n" |
| 5822 | "})\n" |
| 5823 | "#else\n" |
| 5824 | "#define vcltzh_f16(__p0) __extension__ ({ \\\n" |
| 5825 | " float16_t __s0 = __p0; \\\n" |
| 5826 | " uint16_t __ret; \\\n" |
| 5827 | " __ret = (uint16_t) __builtin_neon_vcltzh_f16(__s0); \\\n" |
| 5828 | " __ret; \\\n" |
| 5829 | "})\n" |
| 5830 | "#endif\n" |
| 5831 | "\n" |
| 5832 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5833 | "#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5834 | " float16_t __s0 = __p0; \\\n" |
| 5835 | " int16_t __ret; \\\n" |
| 5836 | " __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n" |
| 5837 | " __ret; \\\n" |
| 5838 | "})\n" |
| 5839 | "#else\n" |
| 5840 | "#define vcvth_n_s16_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5841 | " float16_t __s0 = __p0; \\\n" |
| 5842 | " int16_t __ret; \\\n" |
| 5843 | " __ret = (int16_t) __builtin_neon_vcvth_n_s16_f16(__s0, __p1); \\\n" |
| 5844 | " __ret; \\\n" |
| 5845 | "})\n" |
| 5846 | "#endif\n" |
| 5847 | "\n" |
| 5848 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5849 | "#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5850 | " float16_t __s0 = __p0; \\\n" |
| 5851 | " int32_t __ret; \\\n" |
| 5852 | " __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n" |
| 5853 | " __ret; \\\n" |
| 5854 | "})\n" |
| 5855 | "#else\n" |
| 5856 | "#define vcvth_n_s32_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5857 | " float16_t __s0 = __p0; \\\n" |
| 5858 | " int32_t __ret; \\\n" |
| 5859 | " __ret = (int32_t) __builtin_neon_vcvth_n_s32_f16(__s0, __p1); \\\n" |
| 5860 | " __ret; \\\n" |
| 5861 | "})\n" |
| 5862 | "#endif\n" |
| 5863 | "\n" |
| 5864 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5865 | "#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5866 | " float16_t __s0 = __p0; \\\n" |
| 5867 | " int64_t __ret; \\\n" |
| 5868 | " __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n" |
| 5869 | " __ret; \\\n" |
| 5870 | "})\n" |
| 5871 | "#else\n" |
| 5872 | "#define vcvth_n_s64_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5873 | " float16_t __s0 = __p0; \\\n" |
| 5874 | " int64_t __ret; \\\n" |
| 5875 | " __ret = (int64_t) __builtin_neon_vcvth_n_s64_f16(__s0, __p1); \\\n" |
| 5876 | " __ret; \\\n" |
| 5877 | "})\n" |
| 5878 | "#endif\n" |
| 5879 | "\n" |
| 5880 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5881 | "#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5882 | " float16_t __s0 = __p0; \\\n" |
| 5883 | " uint16_t __ret; \\\n" |
| 5884 | " __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n" |
| 5885 | " __ret; \\\n" |
| 5886 | "})\n" |
| 5887 | "#else\n" |
| 5888 | "#define vcvth_n_u16_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5889 | " float16_t __s0 = __p0; \\\n" |
| 5890 | " uint16_t __ret; \\\n" |
| 5891 | " __ret = (uint16_t) __builtin_neon_vcvth_n_u16_f16(__s0, __p1); \\\n" |
| 5892 | " __ret; \\\n" |
| 5893 | "})\n" |
| 5894 | "#endif\n" |
| 5895 | "\n" |
| 5896 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5897 | "#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5898 | " float16_t __s0 = __p0; \\\n" |
| 5899 | " uint32_t __ret; \\\n" |
| 5900 | " __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n" |
| 5901 | " __ret; \\\n" |
| 5902 | "})\n" |
| 5903 | "#else\n" |
| 5904 | "#define vcvth_n_u32_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5905 | " float16_t __s0 = __p0; \\\n" |
| 5906 | " uint32_t __ret; \\\n" |
| 5907 | " __ret = (uint32_t) __builtin_neon_vcvth_n_u32_f16(__s0, __p1); \\\n" |
| 5908 | " __ret; \\\n" |
| 5909 | "})\n" |
| 5910 | "#endif\n" |
| 5911 | "\n" |
| 5912 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5913 | "#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5914 | " float16_t __s0 = __p0; \\\n" |
| 5915 | " uint64_t __ret; \\\n" |
| 5916 | " __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n" |
| 5917 | " __ret; \\\n" |
| 5918 | "})\n" |
| 5919 | "#else\n" |
| 5920 | "#define vcvth_n_u64_f16(__p0, __p1) __extension__ ({ \\\n" |
| 5921 | " float16_t __s0 = __p0; \\\n" |
| 5922 | " uint64_t __ret; \\\n" |
| 5923 | " __ret = (uint64_t) __builtin_neon_vcvth_n_u64_f16(__s0, __p1); \\\n" |
| 5924 | " __ret; \\\n" |
| 5925 | "})\n" |
| 5926 | "#endif\n" |
| 5927 | "\n" |
| 5928 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5929 | "#define vcvth_s16_f16(__p0) __extension__ ({ \\\n" |
| 5930 | " float16_t __s0 = __p0; \\\n" |
| 5931 | " int16_t __ret; \\\n" |
| 5932 | " __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n" |
| 5933 | " __ret; \\\n" |
| 5934 | "})\n" |
| 5935 | "#else\n" |
| 5936 | "#define vcvth_s16_f16(__p0) __extension__ ({ \\\n" |
| 5937 | " float16_t __s0 = __p0; \\\n" |
| 5938 | " int16_t __ret; \\\n" |
| 5939 | " __ret = (int16_t) __builtin_neon_vcvth_s16_f16(__s0); \\\n" |
| 5940 | " __ret; \\\n" |
| 5941 | "})\n" |
| 5942 | "#endif\n" |
| 5943 | "\n" |
| 5944 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5945 | "#define vcvth_s32_f16(__p0) __extension__ ({ \\\n" |
| 5946 | " float16_t __s0 = __p0; \\\n" |
| 5947 | " int32_t __ret; \\\n" |
| 5948 | " __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n" |
| 5949 | " __ret; \\\n" |
| 5950 | "})\n" |
| 5951 | "#else\n" |
| 5952 | "#define vcvth_s32_f16(__p0) __extension__ ({ \\\n" |
| 5953 | " float16_t __s0 = __p0; \\\n" |
| 5954 | " int32_t __ret; \\\n" |
| 5955 | " __ret = (int32_t) __builtin_neon_vcvth_s32_f16(__s0); \\\n" |
| 5956 | " __ret; \\\n" |
| 5957 | "})\n" |
| 5958 | "#endif\n" |
| 5959 | "\n" |
| 5960 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5961 | "#define vcvth_s64_f16(__p0) __extension__ ({ \\\n" |
| 5962 | " float16_t __s0 = __p0; \\\n" |
| 5963 | " int64_t __ret; \\\n" |
| 5964 | " __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n" |
| 5965 | " __ret; \\\n" |
| 5966 | "})\n" |
| 5967 | "#else\n" |
| 5968 | "#define vcvth_s64_f16(__p0) __extension__ ({ \\\n" |
| 5969 | " float16_t __s0 = __p0; \\\n" |
| 5970 | " int64_t __ret; \\\n" |
| 5971 | " __ret = (int64_t) __builtin_neon_vcvth_s64_f16(__s0); \\\n" |
| 5972 | " __ret; \\\n" |
| 5973 | "})\n" |
| 5974 | "#endif\n" |
| 5975 | "\n" |
| 5976 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5977 | "#define vcvth_u16_f16(__p0) __extension__ ({ \\\n" |
| 5978 | " float16_t __s0 = __p0; \\\n" |
| 5979 | " uint16_t __ret; \\\n" |
| 5980 | " __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n" |
| 5981 | " __ret; \\\n" |
| 5982 | "})\n" |
| 5983 | "#else\n" |
| 5984 | "#define vcvth_u16_f16(__p0) __extension__ ({ \\\n" |
| 5985 | " float16_t __s0 = __p0; \\\n" |
| 5986 | " uint16_t __ret; \\\n" |
| 5987 | " __ret = (uint16_t) __builtin_neon_vcvth_u16_f16(__s0); \\\n" |
| 5988 | " __ret; \\\n" |
| 5989 | "})\n" |
| 5990 | "#endif\n" |
| 5991 | "\n" |
| 5992 | "#ifdef __LITTLE_ENDIAN__\n" |
| 5993 | "#define vcvth_u32_f16(__p0) __extension__ ({ \\\n" |
| 5994 | " float16_t __s0 = __p0; \\\n" |
| 5995 | " uint32_t __ret; \\\n" |
| 5996 | " __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n" |
| 5997 | " __ret; \\\n" |
| 5998 | "})\n" |
| 5999 | "#else\n" |
| 6000 | "#define vcvth_u32_f16(__p0) __extension__ ({ \\\n" |
| 6001 | " float16_t __s0 = __p0; \\\n" |
| 6002 | " uint32_t __ret; \\\n" |
| 6003 | " __ret = (uint32_t) __builtin_neon_vcvth_u32_f16(__s0); \\\n" |
| 6004 | " __ret; \\\n" |
| 6005 | "})\n" |
| 6006 | "#endif\n" |
| 6007 | "\n" |
| 6008 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6009 | "#define vcvth_u64_f16(__p0) __extension__ ({ \\\n" |
| 6010 | " float16_t __s0 = __p0; \\\n" |
| 6011 | " uint64_t __ret; \\\n" |
| 6012 | " __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n" |
| 6013 | " __ret; \\\n" |
| 6014 | "})\n" |
| 6015 | "#else\n" |
| 6016 | "#define vcvth_u64_f16(__p0) __extension__ ({ \\\n" |
| 6017 | " float16_t __s0 = __p0; \\\n" |
| 6018 | " uint64_t __ret; \\\n" |
| 6019 | " __ret = (uint64_t) __builtin_neon_vcvth_u64_f16(__s0); \\\n" |
| 6020 | " __ret; \\\n" |
| 6021 | "})\n" |
| 6022 | "#endif\n" |
| 6023 | "\n" |
| 6024 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6025 | "#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n" |
| 6026 | " float16_t __s0 = __p0; \\\n" |
| 6027 | " int16_t __ret; \\\n" |
| 6028 | " __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n" |
| 6029 | " __ret; \\\n" |
| 6030 | "})\n" |
| 6031 | "#else\n" |
| 6032 | "#define vcvtah_s16_f16(__p0) __extension__ ({ \\\n" |
| 6033 | " float16_t __s0 = __p0; \\\n" |
| 6034 | " int16_t __ret; \\\n" |
| 6035 | " __ret = (int16_t) __builtin_neon_vcvtah_s16_f16(__s0); \\\n" |
| 6036 | " __ret; \\\n" |
| 6037 | "})\n" |
| 6038 | "#endif\n" |
| 6039 | "\n" |
| 6040 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6041 | "#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n" |
| 6042 | " float16_t __s0 = __p0; \\\n" |
| 6043 | " int32_t __ret; \\\n" |
| 6044 | " __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n" |
| 6045 | " __ret; \\\n" |
| 6046 | "})\n" |
| 6047 | "#else\n" |
| 6048 | "#define vcvtah_s32_f16(__p0) __extension__ ({ \\\n" |
| 6049 | " float16_t __s0 = __p0; \\\n" |
| 6050 | " int32_t __ret; \\\n" |
| 6051 | " __ret = (int32_t) __builtin_neon_vcvtah_s32_f16(__s0); \\\n" |
| 6052 | " __ret; \\\n" |
| 6053 | "})\n" |
| 6054 | "#endif\n" |
| 6055 | "\n" |
| 6056 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6057 | "#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n" |
| 6058 | " float16_t __s0 = __p0; \\\n" |
| 6059 | " int64_t __ret; \\\n" |
| 6060 | " __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n" |
| 6061 | " __ret; \\\n" |
| 6062 | "})\n" |
| 6063 | "#else\n" |
| 6064 | "#define vcvtah_s64_f16(__p0) __extension__ ({ \\\n" |
| 6065 | " float16_t __s0 = __p0; \\\n" |
| 6066 | " int64_t __ret; \\\n" |
| 6067 | " __ret = (int64_t) __builtin_neon_vcvtah_s64_f16(__s0); \\\n" |
| 6068 | " __ret; \\\n" |
| 6069 | "})\n" |
| 6070 | "#endif\n" |
| 6071 | "\n" |
| 6072 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6073 | "#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n" |
| 6074 | " float16_t __s0 = __p0; \\\n" |
| 6075 | " uint16_t __ret; \\\n" |
| 6076 | " __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n" |
| 6077 | " __ret; \\\n" |
| 6078 | "})\n" |
| 6079 | "#else\n" |
| 6080 | "#define vcvtah_u16_f16(__p0) __extension__ ({ \\\n" |
| 6081 | " float16_t __s0 = __p0; \\\n" |
| 6082 | " uint16_t __ret; \\\n" |
| 6083 | " __ret = (uint16_t) __builtin_neon_vcvtah_u16_f16(__s0); \\\n" |
| 6084 | " __ret; \\\n" |
| 6085 | "})\n" |
| 6086 | "#endif\n" |
| 6087 | "\n" |
| 6088 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6089 | "#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n" |
| 6090 | " float16_t __s0 = __p0; \\\n" |
| 6091 | " uint32_t __ret; \\\n" |
| 6092 | " __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n" |
| 6093 | " __ret; \\\n" |
| 6094 | "})\n" |
| 6095 | "#else\n" |
| 6096 | "#define vcvtah_u32_f16(__p0) __extension__ ({ \\\n" |
| 6097 | " float16_t __s0 = __p0; \\\n" |
| 6098 | " uint32_t __ret; \\\n" |
| 6099 | " __ret = (uint32_t) __builtin_neon_vcvtah_u32_f16(__s0); \\\n" |
| 6100 | " __ret; \\\n" |
| 6101 | "})\n" |
| 6102 | "#endif\n" |
| 6103 | "\n" |
| 6104 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6105 | "#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n" |
| 6106 | " float16_t __s0 = __p0; \\\n" |
| 6107 | " uint64_t __ret; \\\n" |
| 6108 | " __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n" |
| 6109 | " __ret; \\\n" |
| 6110 | "})\n" |
| 6111 | "#else\n" |
| 6112 | "#define vcvtah_u64_f16(__p0) __extension__ ({ \\\n" |
| 6113 | " float16_t __s0 = __p0; \\\n" |
| 6114 | " uint64_t __ret; \\\n" |
| 6115 | " __ret = (uint64_t) __builtin_neon_vcvtah_u64_f16(__s0); \\\n" |
| 6116 | " __ret; \\\n" |
| 6117 | "})\n" |
| 6118 | "#endif\n" |
| 6119 | "\n" |
| 6120 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6121 | "__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n" |
| 6122 | " float16_t __ret;\n" |
| 6123 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n" |
| 6124 | " return __ret;\n" |
| 6125 | "}\n" |
| 6126 | "#else\n" |
| 6127 | "__ai float16_t vcvth_f16_u32(uint32_t __p0) {\n" |
| 6128 | " float16_t __ret;\n" |
| 6129 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u32(__p0);\n" |
| 6130 | " return __ret;\n" |
| 6131 | "}\n" |
| 6132 | "#endif\n" |
| 6133 | "\n" |
| 6134 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6135 | "__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n" |
| 6136 | " float16_t __ret;\n" |
| 6137 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n" |
| 6138 | " return __ret;\n" |
| 6139 | "}\n" |
| 6140 | "#else\n" |
| 6141 | "__ai float16_t vcvth_f16_u64(uint64_t __p0) {\n" |
| 6142 | " float16_t __ret;\n" |
| 6143 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u64(__p0);\n" |
| 6144 | " return __ret;\n" |
| 6145 | "}\n" |
| 6146 | "#endif\n" |
| 6147 | "\n" |
| 6148 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6149 | "__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n" |
| 6150 | " float16_t __ret;\n" |
| 6151 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n" |
| 6152 | " return __ret;\n" |
| 6153 | "}\n" |
| 6154 | "#else\n" |
| 6155 | "__ai float16_t vcvth_f16_u16(uint16_t __p0) {\n" |
| 6156 | " float16_t __ret;\n" |
| 6157 | " __ret = (float16_t) __builtin_neon_vcvth_f16_u16(__p0);\n" |
| 6158 | " return __ret;\n" |
| 6159 | "}\n" |
| 6160 | "#endif\n" |
| 6161 | "\n" |
| 6162 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6163 | "__ai float16_t vcvth_f16_s32(int32_t __p0) {\n" |
| 6164 | " float16_t __ret;\n" |
| 6165 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n" |
| 6166 | " return __ret;\n" |
| 6167 | "}\n" |
| 6168 | "#else\n" |
| 6169 | "__ai float16_t vcvth_f16_s32(int32_t __p0) {\n" |
| 6170 | " float16_t __ret;\n" |
| 6171 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s32(__p0);\n" |
| 6172 | " return __ret;\n" |
| 6173 | "}\n" |
| 6174 | "#endif\n" |
| 6175 | "\n" |
| 6176 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6177 | "__ai float16_t vcvth_f16_s64(int64_t __p0) {\n" |
| 6178 | " float16_t __ret;\n" |
| 6179 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n" |
| 6180 | " return __ret;\n" |
| 6181 | "}\n" |
| 6182 | "#else\n" |
| 6183 | "__ai float16_t vcvth_f16_s64(int64_t __p0) {\n" |
| 6184 | " float16_t __ret;\n" |
| 6185 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s64(__p0);\n" |
| 6186 | " return __ret;\n" |
| 6187 | "}\n" |
| 6188 | "#endif\n" |
| 6189 | "\n" |
| 6190 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6191 | "__ai float16_t vcvth_f16_s16(int16_t __p0) {\n" |
| 6192 | " float16_t __ret;\n" |
| 6193 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n" |
| 6194 | " return __ret;\n" |
| 6195 | "}\n" |
| 6196 | "#else\n" |
| 6197 | "__ai float16_t vcvth_f16_s16(int16_t __p0) {\n" |
| 6198 | " float16_t __ret;\n" |
| 6199 | " __ret = (float16_t) __builtin_neon_vcvth_f16_s16(__p0);\n" |
| 6200 | " return __ret;\n" |
| 6201 | "}\n" |
| 6202 | "#endif\n" |
| 6203 | "\n" |
| 6204 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6205 | "#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n" |
| 6206 | " uint32_t __s0 = __p0; \\\n" |
| 6207 | " float16_t __ret; \\\n" |
| 6208 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n" |
| 6209 | " __ret; \\\n" |
| 6210 | "})\n" |
| 6211 | "#else\n" |
| 6212 | "#define vcvth_n_f16_u32(__p0, __p1) __extension__ ({ \\\n" |
| 6213 | " uint32_t __s0 = __p0; \\\n" |
| 6214 | " float16_t __ret; \\\n" |
| 6215 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u32(__s0, __p1); \\\n" |
| 6216 | " __ret; \\\n" |
| 6217 | "})\n" |
| 6218 | "#endif\n" |
| 6219 | "\n" |
| 6220 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6221 | "#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n" |
| 6222 | " uint64_t __s0 = __p0; \\\n" |
| 6223 | " float16_t __ret; \\\n" |
| 6224 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n" |
| 6225 | " __ret; \\\n" |
| 6226 | "})\n" |
| 6227 | "#else\n" |
| 6228 | "#define vcvth_n_f16_u64(__p0, __p1) __extension__ ({ \\\n" |
| 6229 | " uint64_t __s0 = __p0; \\\n" |
| 6230 | " float16_t __ret; \\\n" |
| 6231 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u64(__s0, __p1); \\\n" |
| 6232 | " __ret; \\\n" |
| 6233 | "})\n" |
| 6234 | "#endif\n" |
| 6235 | "\n" |
| 6236 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6237 | "#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n" |
| 6238 | " uint16_t __s0 = __p0; \\\n" |
| 6239 | " float16_t __ret; \\\n" |
| 6240 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n" |
| 6241 | " __ret; \\\n" |
| 6242 | "})\n" |
| 6243 | "#else\n" |
| 6244 | "#define vcvth_n_f16_u16(__p0, __p1) __extension__ ({ \\\n" |
| 6245 | " uint16_t __s0 = __p0; \\\n" |
| 6246 | " float16_t __ret; \\\n" |
| 6247 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_u16(__s0, __p1); \\\n" |
| 6248 | " __ret; \\\n" |
| 6249 | "})\n" |
| 6250 | "#endif\n" |
| 6251 | "\n" |
| 6252 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6253 | "#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n" |
| 6254 | " int32_t __s0 = __p0; \\\n" |
| 6255 | " float16_t __ret; \\\n" |
| 6256 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n" |
| 6257 | " __ret; \\\n" |
| 6258 | "})\n" |
| 6259 | "#else\n" |
| 6260 | "#define vcvth_n_f16_s32(__p0, __p1) __extension__ ({ \\\n" |
| 6261 | " int32_t __s0 = __p0; \\\n" |
| 6262 | " float16_t __ret; \\\n" |
| 6263 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s32(__s0, __p1); \\\n" |
| 6264 | " __ret; \\\n" |
| 6265 | "})\n" |
| 6266 | "#endif\n" |
| 6267 | "\n" |
| 6268 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6269 | "#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n" |
| 6270 | " int64_t __s0 = __p0; \\\n" |
| 6271 | " float16_t __ret; \\\n" |
| 6272 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n" |
| 6273 | " __ret; \\\n" |
| 6274 | "})\n" |
| 6275 | "#else\n" |
| 6276 | "#define vcvth_n_f16_s64(__p0, __p1) __extension__ ({ \\\n" |
| 6277 | " int64_t __s0 = __p0; \\\n" |
| 6278 | " float16_t __ret; \\\n" |
| 6279 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s64(__s0, __p1); \\\n" |
| 6280 | " __ret; \\\n" |
| 6281 | "})\n" |
| 6282 | "#endif\n" |
| 6283 | "\n" |
| 6284 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6285 | "#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n" |
| 6286 | " int16_t __s0 = __p0; \\\n" |
| 6287 | " float16_t __ret; \\\n" |
| 6288 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n" |
| 6289 | " __ret; \\\n" |
| 6290 | "})\n" |
| 6291 | "#else\n" |
| 6292 | "#define vcvth_n_f16_s16(__p0, __p1) __extension__ ({ \\\n" |
| 6293 | " int16_t __s0 = __p0; \\\n" |
| 6294 | " float16_t __ret; \\\n" |
| 6295 | " __ret = (float16_t) __builtin_neon_vcvth_n_f16_s16(__s0, __p1); \\\n" |
| 6296 | " __ret; \\\n" |
| 6297 | "})\n" |
| 6298 | "#endif\n" |
| 6299 | "\n" |
| 6300 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6301 | "#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n" |
| 6302 | " float16_t __s0 = __p0; \\\n" |
| 6303 | " int16_t __ret; \\\n" |
| 6304 | " __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n" |
| 6305 | " __ret; \\\n" |
| 6306 | "})\n" |
| 6307 | "#else\n" |
| 6308 | "#define vcvtmh_s16_f16(__p0) __extension__ ({ \\\n" |
| 6309 | " float16_t __s0 = __p0; \\\n" |
| 6310 | " int16_t __ret; \\\n" |
| 6311 | " __ret = (int16_t) __builtin_neon_vcvtmh_s16_f16(__s0); \\\n" |
| 6312 | " __ret; \\\n" |
| 6313 | "})\n" |
| 6314 | "#endif\n" |
| 6315 | "\n" |
| 6316 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6317 | "#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n" |
| 6318 | " float16_t __s0 = __p0; \\\n" |
| 6319 | " int32_t __ret; \\\n" |
| 6320 | " __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n" |
| 6321 | " __ret; \\\n" |
| 6322 | "})\n" |
| 6323 | "#else\n" |
| 6324 | "#define vcvtmh_s32_f16(__p0) __extension__ ({ \\\n" |
| 6325 | " float16_t __s0 = __p0; \\\n" |
| 6326 | " int32_t __ret; \\\n" |
| 6327 | " __ret = (int32_t) __builtin_neon_vcvtmh_s32_f16(__s0); \\\n" |
| 6328 | " __ret; \\\n" |
| 6329 | "})\n" |
| 6330 | "#endif\n" |
| 6331 | "\n" |
| 6332 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6333 | "#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n" |
| 6334 | " float16_t __s0 = __p0; \\\n" |
| 6335 | " int64_t __ret; \\\n" |
| 6336 | " __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n" |
| 6337 | " __ret; \\\n" |
| 6338 | "})\n" |
| 6339 | "#else\n" |
| 6340 | "#define vcvtmh_s64_f16(__p0) __extension__ ({ \\\n" |
| 6341 | " float16_t __s0 = __p0; \\\n" |
| 6342 | " int64_t __ret; \\\n" |
| 6343 | " __ret = (int64_t) __builtin_neon_vcvtmh_s64_f16(__s0); \\\n" |
| 6344 | " __ret; \\\n" |
| 6345 | "})\n" |
| 6346 | "#endif\n" |
| 6347 | "\n" |
| 6348 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6349 | "#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n" |
| 6350 | " float16_t __s0 = __p0; \\\n" |
| 6351 | " uint16_t __ret; \\\n" |
| 6352 | " __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n" |
| 6353 | " __ret; \\\n" |
| 6354 | "})\n" |
| 6355 | "#else\n" |
| 6356 | "#define vcvtmh_u16_f16(__p0) __extension__ ({ \\\n" |
| 6357 | " float16_t __s0 = __p0; \\\n" |
| 6358 | " uint16_t __ret; \\\n" |
| 6359 | " __ret = (uint16_t) __builtin_neon_vcvtmh_u16_f16(__s0); \\\n" |
| 6360 | " __ret; \\\n" |
| 6361 | "})\n" |
| 6362 | "#endif\n" |
| 6363 | "\n" |
| 6364 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6365 | "#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n" |
| 6366 | " float16_t __s0 = __p0; \\\n" |
| 6367 | " uint32_t __ret; \\\n" |
| 6368 | " __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n" |
| 6369 | " __ret; \\\n" |
| 6370 | "})\n" |
| 6371 | "#else\n" |
| 6372 | "#define vcvtmh_u32_f16(__p0) __extension__ ({ \\\n" |
| 6373 | " float16_t __s0 = __p0; \\\n" |
| 6374 | " uint32_t __ret; \\\n" |
| 6375 | " __ret = (uint32_t) __builtin_neon_vcvtmh_u32_f16(__s0); \\\n" |
| 6376 | " __ret; \\\n" |
| 6377 | "})\n" |
| 6378 | "#endif\n" |
| 6379 | "\n" |
| 6380 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6381 | "#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n" |
| 6382 | " float16_t __s0 = __p0; \\\n" |
| 6383 | " uint64_t __ret; \\\n" |
| 6384 | " __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n" |
| 6385 | " __ret; \\\n" |
| 6386 | "})\n" |
| 6387 | "#else\n" |
| 6388 | "#define vcvtmh_u64_f16(__p0) __extension__ ({ \\\n" |
| 6389 | " float16_t __s0 = __p0; \\\n" |
| 6390 | " uint64_t __ret; \\\n" |
| 6391 | " __ret = (uint64_t) __builtin_neon_vcvtmh_u64_f16(__s0); \\\n" |
| 6392 | " __ret; \\\n" |
| 6393 | "})\n" |
| 6394 | "#endif\n" |
| 6395 | "\n" |
| 6396 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6397 | "#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n" |
| 6398 | " float16_t __s0 = __p0; \\\n" |
| 6399 | " int16_t __ret; \\\n" |
| 6400 | " __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n" |
| 6401 | " __ret; \\\n" |
| 6402 | "})\n" |
| 6403 | "#else\n" |
| 6404 | "#define vcvtnh_s16_f16(__p0) __extension__ ({ \\\n" |
| 6405 | " float16_t __s0 = __p0; \\\n" |
| 6406 | " int16_t __ret; \\\n" |
| 6407 | " __ret = (int16_t) __builtin_neon_vcvtnh_s16_f16(__s0); \\\n" |
| 6408 | " __ret; \\\n" |
| 6409 | "})\n" |
| 6410 | "#endif\n" |
| 6411 | "\n" |
| 6412 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6413 | "#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n" |
| 6414 | " float16_t __s0 = __p0; \\\n" |
| 6415 | " int32_t __ret; \\\n" |
| 6416 | " __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n" |
| 6417 | " __ret; \\\n" |
| 6418 | "})\n" |
| 6419 | "#else\n" |
| 6420 | "#define vcvtnh_s32_f16(__p0) __extension__ ({ \\\n" |
| 6421 | " float16_t __s0 = __p0; \\\n" |
| 6422 | " int32_t __ret; \\\n" |
| 6423 | " __ret = (int32_t) __builtin_neon_vcvtnh_s32_f16(__s0); \\\n" |
| 6424 | " __ret; \\\n" |
| 6425 | "})\n" |
| 6426 | "#endif\n" |
| 6427 | "\n" |
| 6428 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6429 | "#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n" |
| 6430 | " float16_t __s0 = __p0; \\\n" |
| 6431 | " int64_t __ret; \\\n" |
| 6432 | " __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n" |
| 6433 | " __ret; \\\n" |
| 6434 | "})\n" |
| 6435 | "#else\n" |
| 6436 | "#define vcvtnh_s64_f16(__p0) __extension__ ({ \\\n" |
| 6437 | " float16_t __s0 = __p0; \\\n" |
| 6438 | " int64_t __ret; \\\n" |
| 6439 | " __ret = (int64_t) __builtin_neon_vcvtnh_s64_f16(__s0); \\\n" |
| 6440 | " __ret; \\\n" |
| 6441 | "})\n" |
| 6442 | "#endif\n" |
| 6443 | "\n" |
| 6444 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6445 | "#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n" |
| 6446 | " float16_t __s0 = __p0; \\\n" |
| 6447 | " uint16_t __ret; \\\n" |
| 6448 | " __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n" |
| 6449 | " __ret; \\\n" |
| 6450 | "})\n" |
| 6451 | "#else\n" |
| 6452 | "#define vcvtnh_u16_f16(__p0) __extension__ ({ \\\n" |
| 6453 | " float16_t __s0 = __p0; \\\n" |
| 6454 | " uint16_t __ret; \\\n" |
| 6455 | " __ret = (uint16_t) __builtin_neon_vcvtnh_u16_f16(__s0); \\\n" |
| 6456 | " __ret; \\\n" |
| 6457 | "})\n" |
| 6458 | "#endif\n" |
| 6459 | "\n" |
| 6460 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6461 | "#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n" |
| 6462 | " float16_t __s0 = __p0; \\\n" |
| 6463 | " uint32_t __ret; \\\n" |
| 6464 | " __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n" |
| 6465 | " __ret; \\\n" |
| 6466 | "})\n" |
| 6467 | "#else\n" |
| 6468 | "#define vcvtnh_u32_f16(__p0) __extension__ ({ \\\n" |
| 6469 | " float16_t __s0 = __p0; \\\n" |
| 6470 | " uint32_t __ret; \\\n" |
| 6471 | " __ret = (uint32_t) __builtin_neon_vcvtnh_u32_f16(__s0); \\\n" |
| 6472 | " __ret; \\\n" |
| 6473 | "})\n" |
| 6474 | "#endif\n" |
| 6475 | "\n" |
| 6476 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6477 | "#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n" |
| 6478 | " float16_t __s0 = __p0; \\\n" |
| 6479 | " uint64_t __ret; \\\n" |
| 6480 | " __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n" |
| 6481 | " __ret; \\\n" |
| 6482 | "})\n" |
| 6483 | "#else\n" |
| 6484 | "#define vcvtnh_u64_f16(__p0) __extension__ ({ \\\n" |
| 6485 | " float16_t __s0 = __p0; \\\n" |
| 6486 | " uint64_t __ret; \\\n" |
| 6487 | " __ret = (uint64_t) __builtin_neon_vcvtnh_u64_f16(__s0); \\\n" |
| 6488 | " __ret; \\\n" |
| 6489 | "})\n" |
| 6490 | "#endif\n" |
| 6491 | "\n" |
| 6492 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6493 | "#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n" |
| 6494 | " float16_t __s0 = __p0; \\\n" |
| 6495 | " int16_t __ret; \\\n" |
| 6496 | " __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n" |
| 6497 | " __ret; \\\n" |
| 6498 | "})\n" |
| 6499 | "#else\n" |
| 6500 | "#define vcvtph_s16_f16(__p0) __extension__ ({ \\\n" |
| 6501 | " float16_t __s0 = __p0; \\\n" |
| 6502 | " int16_t __ret; \\\n" |
| 6503 | " __ret = (int16_t) __builtin_neon_vcvtph_s16_f16(__s0); \\\n" |
| 6504 | " __ret; \\\n" |
| 6505 | "})\n" |
| 6506 | "#endif\n" |
| 6507 | "\n" |
| 6508 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6509 | "#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n" |
| 6510 | " float16_t __s0 = __p0; \\\n" |
| 6511 | " int32_t __ret; \\\n" |
| 6512 | " __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n" |
| 6513 | " __ret; \\\n" |
| 6514 | "})\n" |
| 6515 | "#else\n" |
| 6516 | "#define vcvtph_s32_f16(__p0) __extension__ ({ \\\n" |
| 6517 | " float16_t __s0 = __p0; \\\n" |
| 6518 | " int32_t __ret; \\\n" |
| 6519 | " __ret = (int32_t) __builtin_neon_vcvtph_s32_f16(__s0); \\\n" |
| 6520 | " __ret; \\\n" |
| 6521 | "})\n" |
| 6522 | "#endif\n" |
| 6523 | "\n" |
| 6524 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6525 | "#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n" |
| 6526 | " float16_t __s0 = __p0; \\\n" |
| 6527 | " int64_t __ret; \\\n" |
| 6528 | " __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n" |
| 6529 | " __ret; \\\n" |
| 6530 | "})\n" |
| 6531 | "#else\n" |
| 6532 | "#define vcvtph_s64_f16(__p0) __extension__ ({ \\\n" |
| 6533 | " float16_t __s0 = __p0; \\\n" |
| 6534 | " int64_t __ret; \\\n" |
| 6535 | " __ret = (int64_t) __builtin_neon_vcvtph_s64_f16(__s0); \\\n" |
| 6536 | " __ret; \\\n" |
| 6537 | "})\n" |
| 6538 | "#endif\n" |
| 6539 | "\n" |
| 6540 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6541 | "#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n" |
| 6542 | " float16_t __s0 = __p0; \\\n" |
| 6543 | " uint16_t __ret; \\\n" |
| 6544 | " __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n" |
| 6545 | " __ret; \\\n" |
| 6546 | "})\n" |
| 6547 | "#else\n" |
| 6548 | "#define vcvtph_u16_f16(__p0) __extension__ ({ \\\n" |
| 6549 | " float16_t __s0 = __p0; \\\n" |
| 6550 | " uint16_t __ret; \\\n" |
| 6551 | " __ret = (uint16_t) __builtin_neon_vcvtph_u16_f16(__s0); \\\n" |
| 6552 | " __ret; \\\n" |
| 6553 | "})\n" |
| 6554 | "#endif\n" |
| 6555 | "\n" |
| 6556 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6557 | "#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n" |
| 6558 | " float16_t __s0 = __p0; \\\n" |
| 6559 | " uint32_t __ret; \\\n" |
| 6560 | " __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n" |
| 6561 | " __ret; \\\n" |
| 6562 | "})\n" |
| 6563 | "#else\n" |
| 6564 | "#define vcvtph_u32_f16(__p0) __extension__ ({ \\\n" |
| 6565 | " float16_t __s0 = __p0; \\\n" |
| 6566 | " uint32_t __ret; \\\n" |
| 6567 | " __ret = (uint32_t) __builtin_neon_vcvtph_u32_f16(__s0); \\\n" |
| 6568 | " __ret; \\\n" |
| 6569 | "})\n" |
| 6570 | "#endif\n" |
| 6571 | "\n" |
| 6572 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6573 | "#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n" |
| 6574 | " float16_t __s0 = __p0; \\\n" |
| 6575 | " uint64_t __ret; \\\n" |
| 6576 | " __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n" |
| 6577 | " __ret; \\\n" |
| 6578 | "})\n" |
| 6579 | "#else\n" |
| 6580 | "#define vcvtph_u64_f16(__p0) __extension__ ({ \\\n" |
| 6581 | " float16_t __s0 = __p0; \\\n" |
| 6582 | " uint64_t __ret; \\\n" |
| 6583 | " __ret = (uint64_t) __builtin_neon_vcvtph_u64_f16(__s0); \\\n" |
| 6584 | " __ret; \\\n" |
| 6585 | "})\n" |
| 6586 | "#endif\n" |
| 6587 | "\n" |
| 6588 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6589 | "#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6590 | " float16_t __s0 = __p0; \\\n" |
| 6591 | " float16_t __s1 = __p1; \\\n" |
| 6592 | " float16_t __ret; \\\n" |
| 6593 | " __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n" |
| 6594 | " __ret; \\\n" |
| 6595 | "})\n" |
| 6596 | "#else\n" |
| 6597 | "#define vdivh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6598 | " float16_t __s0 = __p0; \\\n" |
| 6599 | " float16_t __s1 = __p1; \\\n" |
| 6600 | " float16_t __ret; \\\n" |
| 6601 | " __ret = (float16_t) __builtin_neon_vdivh_f16(__s0, __s1); \\\n" |
| 6602 | " __ret; \\\n" |
| 6603 | "})\n" |
| 6604 | "#endif\n" |
| 6605 | "\n" |
| 6606 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6607 | "#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n" |
| 6608 | " float16_t __s0 = __p0; \\\n" |
| 6609 | " float16_t __s1 = __p1; \\\n" |
| 6610 | " float16_t __s2 = __p2; \\\n" |
| 6611 | " float16_t __ret; \\\n" |
| 6612 | " __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n" |
| 6613 | " __ret; \\\n" |
| 6614 | "})\n" |
| 6615 | "#else\n" |
| 6616 | "#define vfmah_f16(__p0, __p1, __p2) __extension__ ({ \\\n" |
| 6617 | " float16_t __s0 = __p0; \\\n" |
| 6618 | " float16_t __s1 = __p1; \\\n" |
| 6619 | " float16_t __s2 = __p2; \\\n" |
| 6620 | " float16_t __ret; \\\n" |
| 6621 | " __ret = (float16_t) __builtin_neon_vfmah_f16(__s0, __s1, __s2); \\\n" |
| 6622 | " __ret; \\\n" |
| 6623 | "})\n" |
| 6624 | "#endif\n" |
| 6625 | "\n" |
| 6626 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6627 | "#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n" |
| 6628 | " float16_t __s0 = __p0; \\\n" |
| 6629 | " float16_t __s1 = __p1; \\\n" |
| 6630 | " float16_t __s2 = __p2; \\\n" |
| 6631 | " float16_t __ret; \\\n" |
| 6632 | " __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n" |
| 6633 | " __ret; \\\n" |
| 6634 | "})\n" |
| 6635 | "#else\n" |
| 6636 | "#define vfmsh_f16(__p0, __p1, __p2) __extension__ ({ \\\n" |
| 6637 | " float16_t __s0 = __p0; \\\n" |
| 6638 | " float16_t __s1 = __p1; \\\n" |
| 6639 | " float16_t __s2 = __p2; \\\n" |
| 6640 | " float16_t __ret; \\\n" |
| 6641 | " __ret = (float16_t) __builtin_neon_vfmsh_f16(__s0, __s1, __s2); \\\n" |
| 6642 | " __ret; \\\n" |
| 6643 | "})\n" |
| 6644 | "#endif\n" |
| 6645 | "\n" |
| 6646 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6647 | "#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6648 | " float16_t __s0 = __p0; \\\n" |
| 6649 | " float16_t __s1 = __p1; \\\n" |
| 6650 | " float16_t __ret; \\\n" |
| 6651 | " __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n" |
| 6652 | " __ret; \\\n" |
| 6653 | "})\n" |
| 6654 | "#else\n" |
| 6655 | "#define vmaxh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6656 | " float16_t __s0 = __p0; \\\n" |
| 6657 | " float16_t __s1 = __p1; \\\n" |
| 6658 | " float16_t __ret; \\\n" |
| 6659 | " __ret = (float16_t) __builtin_neon_vmaxh_f16(__s0, __s1); \\\n" |
| 6660 | " __ret; \\\n" |
| 6661 | "})\n" |
| 6662 | "#endif\n" |
| 6663 | "\n" |
| 6664 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6665 | "#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6666 | " float16_t __s0 = __p0; \\\n" |
| 6667 | " float16_t __s1 = __p1; \\\n" |
| 6668 | " float16_t __ret; \\\n" |
| 6669 | " __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n" |
| 6670 | " __ret; \\\n" |
| 6671 | "})\n" |
| 6672 | "#else\n" |
| 6673 | "#define vmaxnmh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6674 | " float16_t __s0 = __p0; \\\n" |
| 6675 | " float16_t __s1 = __p1; \\\n" |
| 6676 | " float16_t __ret; \\\n" |
| 6677 | " __ret = (float16_t) __builtin_neon_vmaxnmh_f16(__s0, __s1); \\\n" |
| 6678 | " __ret; \\\n" |
| 6679 | "})\n" |
| 6680 | "#endif\n" |
| 6681 | "\n" |
| 6682 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6683 | "#define vminh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6684 | " float16_t __s0 = __p0; \\\n" |
| 6685 | " float16_t __s1 = __p1; \\\n" |
| 6686 | " float16_t __ret; \\\n" |
| 6687 | " __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n" |
| 6688 | " __ret; \\\n" |
| 6689 | "})\n" |
| 6690 | "#else\n" |
| 6691 | "#define vminh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6692 | " float16_t __s0 = __p0; \\\n" |
| 6693 | " float16_t __s1 = __p1; \\\n" |
| 6694 | " float16_t __ret; \\\n" |
| 6695 | " __ret = (float16_t) __builtin_neon_vminh_f16(__s0, __s1); \\\n" |
| 6696 | " __ret; \\\n" |
| 6697 | "})\n" |
| 6698 | "#endif\n" |
| 6699 | "\n" |
| 6700 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6701 | "#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6702 | " float16_t __s0 = __p0; \\\n" |
| 6703 | " float16_t __s1 = __p1; \\\n" |
| 6704 | " float16_t __ret; \\\n" |
| 6705 | " __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n" |
| 6706 | " __ret; \\\n" |
| 6707 | "})\n" |
| 6708 | "#else\n" |
| 6709 | "#define vminnmh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6710 | " float16_t __s0 = __p0; \\\n" |
| 6711 | " float16_t __s1 = __p1; \\\n" |
| 6712 | " float16_t __ret; \\\n" |
| 6713 | " __ret = (float16_t) __builtin_neon_vminnmh_f16(__s0, __s1); \\\n" |
| 6714 | " __ret; \\\n" |
| 6715 | "})\n" |
| 6716 | "#endif\n" |
| 6717 | "\n" |
| 6718 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6719 | "#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6720 | " float16_t __s0 = __p0; \\\n" |
| 6721 | " float16_t __s1 = __p1; \\\n" |
| 6722 | " float16_t __ret; \\\n" |
| 6723 | " __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n" |
| 6724 | " __ret; \\\n" |
| 6725 | "})\n" |
| 6726 | "#else\n" |
| 6727 | "#define vmulh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6728 | " float16_t __s0 = __p0; \\\n" |
| 6729 | " float16_t __s1 = __p1; \\\n" |
| 6730 | " float16_t __ret; \\\n" |
| 6731 | " __ret = (float16_t) __builtin_neon_vmulh_f16(__s0, __s1); \\\n" |
| 6732 | " __ret; \\\n" |
| 6733 | "})\n" |
| 6734 | "#endif\n" |
| 6735 | "\n" |
| 6736 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6737 | "#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6738 | " float16_t __s0 = __p0; \\\n" |
| 6739 | " float16_t __s1 = __p1; \\\n" |
| 6740 | " float16_t __ret; \\\n" |
| 6741 | " __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n" |
| 6742 | " __ret; \\\n" |
| 6743 | "})\n" |
| 6744 | "#else\n" |
| 6745 | "#define vmulxh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6746 | " float16_t __s0 = __p0; \\\n" |
| 6747 | " float16_t __s1 = __p1; \\\n" |
| 6748 | " float16_t __ret; \\\n" |
| 6749 | " __ret = (float16_t) __builtin_neon_vmulxh_f16(__s0, __s1); \\\n" |
| 6750 | " __ret; \\\n" |
| 6751 | "})\n" |
| 6752 | "#endif\n" |
| 6753 | "\n" |
| 6754 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6755 | "#define vnegh_f16(__p0) __extension__ ({ \\\n" |
| 6756 | " float16_t __s0 = __p0; \\\n" |
| 6757 | " float16_t __ret; \\\n" |
| 6758 | " __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n" |
| 6759 | " __ret; \\\n" |
| 6760 | "})\n" |
| 6761 | "#else\n" |
| 6762 | "#define vnegh_f16(__p0) __extension__ ({ \\\n" |
| 6763 | " float16_t __s0 = __p0; \\\n" |
| 6764 | " float16_t __ret; \\\n" |
| 6765 | " __ret = (float16_t) __builtin_neon_vnegh_f16(__s0); \\\n" |
| 6766 | " __ret; \\\n" |
| 6767 | "})\n" |
| 6768 | "#endif\n" |
| 6769 | "\n" |
| 6770 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6771 | "#define vrecpeh_f16(__p0) __extension__ ({ \\\n" |
| 6772 | " float16_t __s0 = __p0; \\\n" |
| 6773 | " float16_t __ret; \\\n" |
| 6774 | " __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n" |
| 6775 | " __ret; \\\n" |
| 6776 | "})\n" |
| 6777 | "#else\n" |
| 6778 | "#define vrecpeh_f16(__p0) __extension__ ({ \\\n" |
| 6779 | " float16_t __s0 = __p0; \\\n" |
| 6780 | " float16_t __ret; \\\n" |
| 6781 | " __ret = (float16_t) __builtin_neon_vrecpeh_f16(__s0); \\\n" |
| 6782 | " __ret; \\\n" |
| 6783 | "})\n" |
| 6784 | "#endif\n" |
| 6785 | "\n" |
| 6786 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6787 | "#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6788 | " float16_t __s0 = __p0; \\\n" |
| 6789 | " float16_t __s1 = __p1; \\\n" |
| 6790 | " float16_t __ret; \\\n" |
| 6791 | " __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n" |
| 6792 | " __ret; \\\n" |
| 6793 | "})\n" |
| 6794 | "#else\n" |
| 6795 | "#define vrecpsh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6796 | " float16_t __s0 = __p0; \\\n" |
| 6797 | " float16_t __s1 = __p1; \\\n" |
| 6798 | " float16_t __ret; \\\n" |
| 6799 | " __ret = (float16_t) __builtin_neon_vrecpsh_f16(__s0, __s1); \\\n" |
| 6800 | " __ret; \\\n" |
| 6801 | "})\n" |
| 6802 | "#endif\n" |
| 6803 | "\n" |
| 6804 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6805 | "#define vrecpxh_f16(__p0) __extension__ ({ \\\n" |
| 6806 | " float16_t __s0 = __p0; \\\n" |
| 6807 | " float16_t __ret; \\\n" |
| 6808 | " __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n" |
| 6809 | " __ret; \\\n" |
| 6810 | "})\n" |
| 6811 | "#else\n" |
| 6812 | "#define vrecpxh_f16(__p0) __extension__ ({ \\\n" |
| 6813 | " float16_t __s0 = __p0; \\\n" |
| 6814 | " float16_t __ret; \\\n" |
| 6815 | " __ret = (float16_t) __builtin_neon_vrecpxh_f16(__s0); \\\n" |
| 6816 | " __ret; \\\n" |
| 6817 | "})\n" |
| 6818 | "#endif\n" |
| 6819 | "\n" |
| 6820 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6821 | "#define vrndh_f16(__p0) __extension__ ({ \\\n" |
| 6822 | " float16_t __s0 = __p0; \\\n" |
| 6823 | " float16_t __ret; \\\n" |
| 6824 | " __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n" |
| 6825 | " __ret; \\\n" |
| 6826 | "})\n" |
| 6827 | "#else\n" |
| 6828 | "#define vrndh_f16(__p0) __extension__ ({ \\\n" |
| 6829 | " float16_t __s0 = __p0; \\\n" |
| 6830 | " float16_t __ret; \\\n" |
| 6831 | " __ret = (float16_t) __builtin_neon_vrndh_f16(__s0); \\\n" |
| 6832 | " __ret; \\\n" |
| 6833 | "})\n" |
| 6834 | "#endif\n" |
| 6835 | "\n" |
| 6836 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6837 | "#define vrndah_f16(__p0) __extension__ ({ \\\n" |
| 6838 | " float16_t __s0 = __p0; \\\n" |
| 6839 | " float16_t __ret; \\\n" |
| 6840 | " __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n" |
| 6841 | " __ret; \\\n" |
| 6842 | "})\n" |
| 6843 | "#else\n" |
| 6844 | "#define vrndah_f16(__p0) __extension__ ({ \\\n" |
| 6845 | " float16_t __s0 = __p0; \\\n" |
| 6846 | " float16_t __ret; \\\n" |
| 6847 | " __ret = (float16_t) __builtin_neon_vrndah_f16(__s0); \\\n" |
| 6848 | " __ret; \\\n" |
| 6849 | "})\n" |
| 6850 | "#endif\n" |
| 6851 | "\n" |
| 6852 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6853 | "#define vrndih_f16(__p0) __extension__ ({ \\\n" |
| 6854 | " float16_t __s0 = __p0; \\\n" |
| 6855 | " float16_t __ret; \\\n" |
| 6856 | " __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n" |
| 6857 | " __ret; \\\n" |
| 6858 | "})\n" |
| 6859 | "#else\n" |
| 6860 | "#define vrndih_f16(__p0) __extension__ ({ \\\n" |
| 6861 | " float16_t __s0 = __p0; \\\n" |
| 6862 | " float16_t __ret; \\\n" |
| 6863 | " __ret = (float16_t) __builtin_neon_vrndih_f16(__s0); \\\n" |
| 6864 | " __ret; \\\n" |
| 6865 | "})\n" |
| 6866 | "#endif\n" |
| 6867 | "\n" |
| 6868 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6869 | "#define vrndmh_f16(__p0) __extension__ ({ \\\n" |
| 6870 | " float16_t __s0 = __p0; \\\n" |
| 6871 | " float16_t __ret; \\\n" |
| 6872 | " __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n" |
| 6873 | " __ret; \\\n" |
| 6874 | "})\n" |
| 6875 | "#else\n" |
| 6876 | "#define vrndmh_f16(__p0) __extension__ ({ \\\n" |
| 6877 | " float16_t __s0 = __p0; \\\n" |
| 6878 | " float16_t __ret; \\\n" |
| 6879 | " __ret = (float16_t) __builtin_neon_vrndmh_f16(__s0); \\\n" |
| 6880 | " __ret; \\\n" |
| 6881 | "})\n" |
| 6882 | "#endif\n" |
| 6883 | "\n" |
| 6884 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6885 | "#define vrndnh_f16(__p0) __extension__ ({ \\\n" |
| 6886 | " float16_t __s0 = __p0; \\\n" |
| 6887 | " float16_t __ret; \\\n" |
| 6888 | " __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n" |
| 6889 | " __ret; \\\n" |
| 6890 | "})\n" |
| 6891 | "#else\n" |
| 6892 | "#define vrndnh_f16(__p0) __extension__ ({ \\\n" |
| 6893 | " float16_t __s0 = __p0; \\\n" |
| 6894 | " float16_t __ret; \\\n" |
| 6895 | " __ret = (float16_t) __builtin_neon_vrndnh_f16(__s0); \\\n" |
| 6896 | " __ret; \\\n" |
| 6897 | "})\n" |
| 6898 | "#endif\n" |
| 6899 | "\n" |
| 6900 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6901 | "#define vrndph_f16(__p0) __extension__ ({ \\\n" |
| 6902 | " float16_t __s0 = __p0; \\\n" |
| 6903 | " float16_t __ret; \\\n" |
| 6904 | " __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n" |
| 6905 | " __ret; \\\n" |
| 6906 | "})\n" |
| 6907 | "#else\n" |
| 6908 | "#define vrndph_f16(__p0) __extension__ ({ \\\n" |
| 6909 | " float16_t __s0 = __p0; \\\n" |
| 6910 | " float16_t __ret; \\\n" |
| 6911 | " __ret = (float16_t) __builtin_neon_vrndph_f16(__s0); \\\n" |
| 6912 | " __ret; \\\n" |
| 6913 | "})\n" |
| 6914 | "#endif\n" |
| 6915 | "\n" |
| 6916 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6917 | "#define vrndxh_f16(__p0) __extension__ ({ \\\n" |
| 6918 | " float16_t __s0 = __p0; \\\n" |
| 6919 | " float16_t __ret; \\\n" |
| 6920 | " __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n" |
| 6921 | " __ret; \\\n" |
| 6922 | "})\n" |
| 6923 | "#else\n" |
| 6924 | "#define vrndxh_f16(__p0) __extension__ ({ \\\n" |
| 6925 | " float16_t __s0 = __p0; \\\n" |
| 6926 | " float16_t __ret; \\\n" |
| 6927 | " __ret = (float16_t) __builtin_neon_vrndxh_f16(__s0); \\\n" |
| 6928 | " __ret; \\\n" |
| 6929 | "})\n" |
| 6930 | "#endif\n" |
| 6931 | "\n" |
| 6932 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6933 | "#define vrsqrteh_f16(__p0) __extension__ ({ \\\n" |
| 6934 | " float16_t __s0 = __p0; \\\n" |
| 6935 | " float16_t __ret; \\\n" |
| 6936 | " __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n" |
| 6937 | " __ret; \\\n" |
| 6938 | "})\n" |
| 6939 | "#else\n" |
| 6940 | "#define vrsqrteh_f16(__p0) __extension__ ({ \\\n" |
| 6941 | " float16_t __s0 = __p0; \\\n" |
| 6942 | " float16_t __ret; \\\n" |
| 6943 | " __ret = (float16_t) __builtin_neon_vrsqrteh_f16(__s0); \\\n" |
| 6944 | " __ret; \\\n" |
| 6945 | "})\n" |
| 6946 | "#endif\n" |
| 6947 | "\n" |
| 6948 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6949 | "#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6950 | " float16_t __s0 = __p0; \\\n" |
| 6951 | " float16_t __s1 = __p1; \\\n" |
| 6952 | " float16_t __ret; \\\n" |
| 6953 | " __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n" |
| 6954 | " __ret; \\\n" |
| 6955 | "})\n" |
| 6956 | "#else\n" |
| 6957 | "#define vrsqrtsh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6958 | " float16_t __s0 = __p0; \\\n" |
| 6959 | " float16_t __s1 = __p1; \\\n" |
| 6960 | " float16_t __ret; \\\n" |
| 6961 | " __ret = (float16_t) __builtin_neon_vrsqrtsh_f16(__s0, __s1); \\\n" |
| 6962 | " __ret; \\\n" |
| 6963 | "})\n" |
| 6964 | "#endif\n" |
| 6965 | "\n" |
| 6966 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6967 | "#define vsqrth_f16(__p0) __extension__ ({ \\\n" |
| 6968 | " float16_t __s0 = __p0; \\\n" |
| 6969 | " float16_t __ret; \\\n" |
| 6970 | " __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n" |
| 6971 | " __ret; \\\n" |
| 6972 | "})\n" |
| 6973 | "#else\n" |
| 6974 | "#define vsqrth_f16(__p0) __extension__ ({ \\\n" |
| 6975 | " float16_t __s0 = __p0; \\\n" |
| 6976 | " float16_t __ret; \\\n" |
| 6977 | " __ret = (float16_t) __builtin_neon_vsqrth_f16(__s0); \\\n" |
| 6978 | " __ret; \\\n" |
| 6979 | "})\n" |
| 6980 | "#endif\n" |
| 6981 | "\n" |
| 6982 | "#ifdef __LITTLE_ENDIAN__\n" |
| 6983 | "#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6984 | " float16_t __s0 = __p0; \\\n" |
| 6985 | " float16_t __s1 = __p1; \\\n" |
| 6986 | " float16_t __ret; \\\n" |
| 6987 | " __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n" |
| 6988 | " __ret; \\\n" |
| 6989 | "})\n" |
| 6990 | "#else\n" |
| 6991 | "#define vsubh_f16(__p0, __p1) __extension__ ({ \\\n" |
| 6992 | " float16_t __s0 = __p0; \\\n" |
| 6993 | " float16_t __s1 = __p1; \\\n" |
| 6994 | " float16_t __ret; \\\n" |
| 6995 | " __ret = (float16_t) __builtin_neon_vsubh_f16(__s0, __s1); \\\n" |
| 6996 | " __ret; \\\n" |
| 6997 | "})\n" |
| 6998 | "#endif\n" |
| 6999 | "\n" |
| 7000 | "#endif\n" |
| 7001 | "\n" |
| 7002 | "#undef __ai\n" |
| 7003 | "\n" |
| 7004 | "#endif /* __ARM_FP16_H */\n" |
| 7005 | "" } , |
| 7006 | { "/builtins/armintr.h" , "/*===---- armintr.h - ARM Windows intrinsics -------------------------------===\n" |
| 7007 | " *\n" |
| 7008 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 7009 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 7010 | " * in the Software without restriction, including without limitation the rights\n" |
| 7011 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 7012 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 7013 | " * furnished to do so, subject to the following conditions:\n" |
| 7014 | " *\n" |
| 7015 | " * The above copyright notice and this permission notice shall be included in\n" |
| 7016 | " * all copies or substantial portions of the Software.\n" |
| 7017 | " *\n" |
| 7018 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 7019 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 7020 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 7021 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 7022 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 7023 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 7024 | " * THE SOFTWARE.\n" |
| 7025 | " *\n" |
| 7026 | " *===-----------------------------------------------------------------------===\n" |
| 7027 | " */\n" |
| 7028 | "\n" |
| 7029 | "/* Only include this if we're compiling for the windows platform. */\n" |
| 7030 | "#ifndef _MSC_VER\n" |
| 7031 | "#include_next <armintr.h>\n" |
| 7032 | "#else\n" |
| 7033 | "\n" |
| 7034 | "#ifndef __ARMINTR_H\n" |
| 7035 | "#define __ARMINTR_H\n" |
| 7036 | "\n" |
| 7037 | "typedef enum\n" |
| 7038 | "{\n" |
| 7039 | " _ARM_BARRIER_SY = 0xF,\n" |
| 7040 | " _ARM_BARRIER_ST = 0xE,\n" |
| 7041 | " _ARM_BARRIER_ISH = 0xB,\n" |
| 7042 | " _ARM_BARRIER_ISHST = 0xA,\n" |
| 7043 | " _ARM_BARRIER_NSH = 0x7,\n" |
| 7044 | " _ARM_BARRIER_NSHST = 0x6,\n" |
| 7045 | " _ARM_BARRIER_OSH = 0x3,\n" |
| 7046 | " _ARM_BARRIER_OSHST = 0x2\n" |
| 7047 | "} _ARMINTR_BARRIER_TYPE;\n" |
| 7048 | "\n" |
| 7049 | "#endif /* __ARMINTR_H */\n" |
| 7050 | "#endif /* _MSC_VER */\n" |
| 7051 | "" } , |
| 7052 | { "/builtins/avx2intrin.h" , "/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===\n" |
| 7053 | " *\n" |
| 7054 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 7055 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 7056 | " * in the Software without restriction, including without limitation the rights\n" |
| 7057 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 7058 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 7059 | " * furnished to do so, subject to the following conditions:\n" |
| 7060 | " *\n" |
| 7061 | " * The above copyright notice and this permission notice shall be included in\n" |
| 7062 | " * all copies or substantial portions of the Software.\n" |
| 7063 | " *\n" |
| 7064 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 7065 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 7066 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 7067 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 7068 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 7069 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 7070 | " * THE SOFTWARE.\n" |
| 7071 | " *\n" |
| 7072 | " *===-----------------------------------------------------------------------===\n" |
| 7073 | " */\n" |
| 7074 | "\n" |
| 7075 | "#ifndef __IMMINTRIN_H\n" |
| 7076 | "#error \"Never use <avx2intrin.h> directly; include <immintrin.h> instead.\"\n" |
| 7077 | "#endif\n" |
| 7078 | "\n" |
| 7079 | "#ifndef __AVX2INTRIN_H\n" |
| 7080 | "#define __AVX2INTRIN_H\n" |
| 7081 | "\n" |
| 7082 | "/* Define the default attributes for the functions in this file. */\n" |
| 7083 | "#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(256)))\n" |
| 7084 | "#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx2\"), __min_vector_width__(128)))\n" |
| 7085 | "\n" |
| 7086 | "/* SSE4 Multiple Packed Sums of Absolute Difference. */\n" |
| 7087 | "#define _mm256_mpsadbw_epu8(X, Y, M) \\\n" |
| 7088 | " (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \\\n" |
| 7089 | " (__v32qi)(__m256i)(Y), (int)(M))\n" |
| 7090 | "\n" |
| 7091 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7092 | "_mm256_abs_epi8(__m256i __a)\n" |
| 7093 | "{\n" |
| 7094 | " return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);\n" |
| 7095 | "}\n" |
| 7096 | "\n" |
| 7097 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7098 | "_mm256_abs_epi16(__m256i __a)\n" |
| 7099 | "{\n" |
| 7100 | " return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);\n" |
| 7101 | "}\n" |
| 7102 | "\n" |
| 7103 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7104 | "_mm256_abs_epi32(__m256i __a)\n" |
| 7105 | "{\n" |
| 7106 | " return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);\n" |
| 7107 | "}\n" |
| 7108 | "\n" |
| 7109 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7110 | "_mm256_packs_epi16(__m256i __a, __m256i __b)\n" |
| 7111 | "{\n" |
| 7112 | " return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);\n" |
| 7113 | "}\n" |
| 7114 | "\n" |
| 7115 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7116 | "_mm256_packs_epi32(__m256i __a, __m256i __b)\n" |
| 7117 | "{\n" |
| 7118 | " return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);\n" |
| 7119 | "}\n" |
| 7120 | "\n" |
| 7121 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7122 | "_mm256_packus_epi16(__m256i __a, __m256i __b)\n" |
| 7123 | "{\n" |
| 7124 | " return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);\n" |
| 7125 | "}\n" |
| 7126 | "\n" |
| 7127 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7128 | "_mm256_packus_epi32(__m256i __V1, __m256i __V2)\n" |
| 7129 | "{\n" |
| 7130 | " return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);\n" |
| 7131 | "}\n" |
| 7132 | "\n" |
| 7133 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7134 | "_mm256_add_epi8(__m256i __a, __m256i __b)\n" |
| 7135 | "{\n" |
| 7136 | " return (__m256i)((__v32qu)__a + (__v32qu)__b);\n" |
| 7137 | "}\n" |
| 7138 | "\n" |
| 7139 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7140 | "_mm256_add_epi16(__m256i __a, __m256i __b)\n" |
| 7141 | "{\n" |
| 7142 | " return (__m256i)((__v16hu)__a + (__v16hu)__b);\n" |
| 7143 | "}\n" |
| 7144 | "\n" |
| 7145 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7146 | "_mm256_add_epi32(__m256i __a, __m256i __b)\n" |
| 7147 | "{\n" |
| 7148 | " return (__m256i)((__v8su)__a + (__v8su)__b);\n" |
| 7149 | "}\n" |
| 7150 | "\n" |
| 7151 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7152 | "_mm256_add_epi64(__m256i __a, __m256i __b)\n" |
| 7153 | "{\n" |
| 7154 | " return (__m256i)((__v4du)__a + (__v4du)__b);\n" |
| 7155 | "}\n" |
| 7156 | "\n" |
| 7157 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7158 | "_mm256_adds_epi8(__m256i __a, __m256i __b)\n" |
| 7159 | "{\n" |
| 7160 | " return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);\n" |
| 7161 | "}\n" |
| 7162 | "\n" |
| 7163 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7164 | "_mm256_adds_epi16(__m256i __a, __m256i __b)\n" |
| 7165 | "{\n" |
| 7166 | " return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7167 | "}\n" |
| 7168 | "\n" |
| 7169 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7170 | "_mm256_adds_epu8(__m256i __a, __m256i __b)\n" |
| 7171 | "{\n" |
| 7172 | " return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);\n" |
| 7173 | "}\n" |
| 7174 | "\n" |
| 7175 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7176 | "_mm256_adds_epu16(__m256i __a, __m256i __b)\n" |
| 7177 | "{\n" |
| 7178 | " return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7179 | "}\n" |
| 7180 | "\n" |
| 7181 | "#define _mm256_alignr_epi8(a, b, n) \\\n" |
| 7182 | " (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \\\n" |
| 7183 | " (__v32qi)(__m256i)(b), (n))\n" |
| 7184 | "\n" |
| 7185 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7186 | "_mm256_and_si256(__m256i __a, __m256i __b)\n" |
| 7187 | "{\n" |
| 7188 | " return (__m256i)((__v4du)__a & (__v4du)__b);\n" |
| 7189 | "}\n" |
| 7190 | "\n" |
| 7191 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7192 | "_mm256_andnot_si256(__m256i __a, __m256i __b)\n" |
| 7193 | "{\n" |
| 7194 | " return (__m256i)(~(__v4du)__a & (__v4du)__b);\n" |
| 7195 | "}\n" |
| 7196 | "\n" |
| 7197 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7198 | "_mm256_avg_epu8(__m256i __a, __m256i __b)\n" |
| 7199 | "{\n" |
| 7200 | " typedef unsigned short __v32hu __attribute__((__vector_size__(64)));\n" |
| 7201 | " return (__m256i)__builtin_convertvector(\n" |
| 7202 | " ((__builtin_convertvector((__v32qu)__a, __v32hu) +\n" |
| 7203 | " __builtin_convertvector((__v32qu)__b, __v32hu)) + 1)\n" |
| 7204 | " >> 1, __v32qu);\n" |
| 7205 | "}\n" |
| 7206 | "\n" |
| 7207 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7208 | "_mm256_avg_epu16(__m256i __a, __m256i __b)\n" |
| 7209 | "{\n" |
| 7210 | " typedef unsigned int __v16su __attribute__((__vector_size__(64)));\n" |
| 7211 | " return (__m256i)__builtin_convertvector(\n" |
| 7212 | " ((__builtin_convertvector((__v16hu)__a, __v16su) +\n" |
| 7213 | " __builtin_convertvector((__v16hu)__b, __v16su)) + 1)\n" |
| 7214 | " >> 1, __v16hu);\n" |
| 7215 | "}\n" |
| 7216 | "\n" |
| 7217 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7218 | "_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)\n" |
| 7219 | "{\n" |
| 7220 | " return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,\n" |
| 7221 | " (__v32qi)__M);\n" |
| 7222 | "}\n" |
| 7223 | "\n" |
| 7224 | "#define _mm256_blend_epi16(V1, V2, M) \\\n" |
| 7225 | " (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \\\n" |
| 7226 | " (__v16hi)(__m256i)(V2), (int)(M))\n" |
| 7227 | "\n" |
| 7228 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7229 | "_mm256_cmpeq_epi8(__m256i __a, __m256i __b)\n" |
| 7230 | "{\n" |
| 7231 | " return (__m256i)((__v32qi)__a == (__v32qi)__b);\n" |
| 7232 | "}\n" |
| 7233 | "\n" |
| 7234 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7235 | "_mm256_cmpeq_epi16(__m256i __a, __m256i __b)\n" |
| 7236 | "{\n" |
| 7237 | " return (__m256i)((__v16hi)__a == (__v16hi)__b);\n" |
| 7238 | "}\n" |
| 7239 | "\n" |
| 7240 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7241 | "_mm256_cmpeq_epi32(__m256i __a, __m256i __b)\n" |
| 7242 | "{\n" |
| 7243 | " return (__m256i)((__v8si)__a == (__v8si)__b);\n" |
| 7244 | "}\n" |
| 7245 | "\n" |
| 7246 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7247 | "_mm256_cmpeq_epi64(__m256i __a, __m256i __b)\n" |
| 7248 | "{\n" |
| 7249 | " return (__m256i)((__v4di)__a == (__v4di)__b);\n" |
| 7250 | "}\n" |
| 7251 | "\n" |
| 7252 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7253 | "_mm256_cmpgt_epi8(__m256i __a, __m256i __b)\n" |
| 7254 | "{\n" |
| 7255 | " /* This function always performs a signed comparison, but __v32qi is a char\n" |
| 7256 | " which may be signed or unsigned, so use __v32qs. */\n" |
| 7257 | " return (__m256i)((__v32qs)__a > (__v32qs)__b);\n" |
| 7258 | "}\n" |
| 7259 | "\n" |
| 7260 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7261 | "_mm256_cmpgt_epi16(__m256i __a, __m256i __b)\n" |
| 7262 | "{\n" |
| 7263 | " return (__m256i)((__v16hi)__a > (__v16hi)__b);\n" |
| 7264 | "}\n" |
| 7265 | "\n" |
| 7266 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7267 | "_mm256_cmpgt_epi32(__m256i __a, __m256i __b)\n" |
| 7268 | "{\n" |
| 7269 | " return (__m256i)((__v8si)__a > (__v8si)__b);\n" |
| 7270 | "}\n" |
| 7271 | "\n" |
| 7272 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7273 | "_mm256_cmpgt_epi64(__m256i __a, __m256i __b)\n" |
| 7274 | "{\n" |
| 7275 | " return (__m256i)((__v4di)__a > (__v4di)__b);\n" |
| 7276 | "}\n" |
| 7277 | "\n" |
| 7278 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7279 | "_mm256_hadd_epi16(__m256i __a, __m256i __b)\n" |
| 7280 | "{\n" |
| 7281 | " return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7282 | "}\n" |
| 7283 | "\n" |
| 7284 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7285 | "_mm256_hadd_epi32(__m256i __a, __m256i __b)\n" |
| 7286 | "{\n" |
| 7287 | " return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);\n" |
| 7288 | "}\n" |
| 7289 | "\n" |
| 7290 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7291 | "_mm256_hadds_epi16(__m256i __a, __m256i __b)\n" |
| 7292 | "{\n" |
| 7293 | " return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7294 | "}\n" |
| 7295 | "\n" |
| 7296 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7297 | "_mm256_hsub_epi16(__m256i __a, __m256i __b)\n" |
| 7298 | "{\n" |
| 7299 | " return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7300 | "}\n" |
| 7301 | "\n" |
| 7302 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7303 | "_mm256_hsub_epi32(__m256i __a, __m256i __b)\n" |
| 7304 | "{\n" |
| 7305 | " return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);\n" |
| 7306 | "}\n" |
| 7307 | "\n" |
| 7308 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7309 | "_mm256_hsubs_epi16(__m256i __a, __m256i __b)\n" |
| 7310 | "{\n" |
| 7311 | " return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7312 | "}\n" |
| 7313 | "\n" |
| 7314 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7315 | "_mm256_maddubs_epi16(__m256i __a, __m256i __b)\n" |
| 7316 | "{\n" |
| 7317 | " return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);\n" |
| 7318 | "}\n" |
| 7319 | "\n" |
| 7320 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7321 | "_mm256_madd_epi16(__m256i __a, __m256i __b)\n" |
| 7322 | "{\n" |
| 7323 | " return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);\n" |
| 7324 | "}\n" |
| 7325 | "\n" |
| 7326 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7327 | "_mm256_max_epi8(__m256i __a, __m256i __b)\n" |
| 7328 | "{\n" |
| 7329 | " return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);\n" |
| 7330 | "}\n" |
| 7331 | "\n" |
| 7332 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7333 | "_mm256_max_epi16(__m256i __a, __m256i __b)\n" |
| 7334 | "{\n" |
| 7335 | " return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7336 | "}\n" |
| 7337 | "\n" |
| 7338 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7339 | "_mm256_max_epi32(__m256i __a, __m256i __b)\n" |
| 7340 | "{\n" |
| 7341 | " return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);\n" |
| 7342 | "}\n" |
| 7343 | "\n" |
| 7344 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7345 | "_mm256_max_epu8(__m256i __a, __m256i __b)\n" |
| 7346 | "{\n" |
| 7347 | " return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);\n" |
| 7348 | "}\n" |
| 7349 | "\n" |
| 7350 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7351 | "_mm256_max_epu16(__m256i __a, __m256i __b)\n" |
| 7352 | "{\n" |
| 7353 | " return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7354 | "}\n" |
| 7355 | "\n" |
| 7356 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7357 | "_mm256_max_epu32(__m256i __a, __m256i __b)\n" |
| 7358 | "{\n" |
| 7359 | " return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);\n" |
| 7360 | "}\n" |
| 7361 | "\n" |
| 7362 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7363 | "_mm256_min_epi8(__m256i __a, __m256i __b)\n" |
| 7364 | "{\n" |
| 7365 | " return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);\n" |
| 7366 | "}\n" |
| 7367 | "\n" |
| 7368 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7369 | "_mm256_min_epi16(__m256i __a, __m256i __b)\n" |
| 7370 | "{\n" |
| 7371 | " return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7372 | "}\n" |
| 7373 | "\n" |
| 7374 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7375 | "_mm256_min_epi32(__m256i __a, __m256i __b)\n" |
| 7376 | "{\n" |
| 7377 | " return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);\n" |
| 7378 | "}\n" |
| 7379 | "\n" |
| 7380 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7381 | "_mm256_min_epu8(__m256i __a, __m256i __b)\n" |
| 7382 | "{\n" |
| 7383 | " return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);\n" |
| 7384 | "}\n" |
| 7385 | "\n" |
| 7386 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7387 | "_mm256_min_epu16(__m256i __a, __m256i __b)\n" |
| 7388 | "{\n" |
| 7389 | " return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);\n" |
| 7390 | "}\n" |
| 7391 | "\n" |
| 7392 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7393 | "_mm256_min_epu32(__m256i __a, __m256i __b)\n" |
| 7394 | "{\n" |
| 7395 | " return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);\n" |
| 7396 | "}\n" |
| 7397 | "\n" |
| 7398 | "static __inline__ int __DEFAULT_FN_ATTRS256\n" |
| 7399 | "_mm256_movemask_epi8(__m256i __a)\n" |
| 7400 | "{\n" |
| 7401 | " return __builtin_ia32_pmovmskb256((__v32qi)__a);\n" |
| 7402 | "}\n" |
| 7403 | "\n" |
| 7404 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7405 | "_mm256_cvtepi8_epi16(__m128i __V)\n" |
| 7406 | "{\n" |
| 7407 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
| 7408 | " which may be signed or unsigned, so use __v16qs. */\n" |
| 7409 | " return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);\n" |
| 7410 | "}\n" |
| 7411 | "\n" |
| 7412 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7413 | "_mm256_cvtepi8_epi32(__m128i __V)\n" |
| 7414 | "{\n" |
| 7415 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
| 7416 | " which may be signed or unsigned, so use __v16qs. */\n" |
| 7417 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n" |
| 7418 | "}\n" |
| 7419 | "\n" |
| 7420 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7421 | "_mm256_cvtepi8_epi64(__m128i __V)\n" |
| 7422 | "{\n" |
| 7423 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
| 7424 | " which may be signed or unsigned, so use __v16qs. */\n" |
| 7425 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);\n" |
| 7426 | "}\n" |
| 7427 | "\n" |
| 7428 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7429 | "_mm256_cvtepi16_epi32(__m128i __V)\n" |
| 7430 | "{\n" |
| 7431 | " return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);\n" |
| 7432 | "}\n" |
| 7433 | "\n" |
| 7434 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7435 | "_mm256_cvtepi16_epi64(__m128i __V)\n" |
| 7436 | "{\n" |
| 7437 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);\n" |
| 7438 | "}\n" |
| 7439 | "\n" |
| 7440 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7441 | "_mm256_cvtepi32_epi64(__m128i __V)\n" |
| 7442 | "{\n" |
| 7443 | " return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);\n" |
| 7444 | "}\n" |
| 7445 | "\n" |
| 7446 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7447 | "_mm256_cvtepu8_epi16(__m128i __V)\n" |
| 7448 | "{\n" |
| 7449 | " return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);\n" |
| 7450 | "}\n" |
| 7451 | "\n" |
| 7452 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7453 | "_mm256_cvtepu8_epi32(__m128i __V)\n" |
| 7454 | "{\n" |
| 7455 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);\n" |
| 7456 | "}\n" |
| 7457 | "\n" |
| 7458 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7459 | "_mm256_cvtepu8_epi64(__m128i __V)\n" |
| 7460 | "{\n" |
| 7461 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);\n" |
| 7462 | "}\n" |
| 7463 | "\n" |
| 7464 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7465 | "_mm256_cvtepu16_epi32(__m128i __V)\n" |
| 7466 | "{\n" |
| 7467 | " return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);\n" |
| 7468 | "}\n" |
| 7469 | "\n" |
| 7470 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7471 | "_mm256_cvtepu16_epi64(__m128i __V)\n" |
| 7472 | "{\n" |
| 7473 | " return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);\n" |
| 7474 | "}\n" |
| 7475 | "\n" |
| 7476 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7477 | "_mm256_cvtepu32_epi64(__m128i __V)\n" |
| 7478 | "{\n" |
| 7479 | " return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);\n" |
| 7480 | "}\n" |
| 7481 | "\n" |
| 7482 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7483 | "_mm256_mul_epi32(__m256i __a, __m256i __b)\n" |
| 7484 | "{\n" |
| 7485 | " return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);\n" |
| 7486 | "}\n" |
| 7487 | "\n" |
| 7488 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7489 | "_mm256_mulhrs_epi16(__m256i __a, __m256i __b)\n" |
| 7490 | "{\n" |
| 7491 | " return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7492 | "}\n" |
| 7493 | "\n" |
| 7494 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7495 | "_mm256_mulhi_epu16(__m256i __a, __m256i __b)\n" |
| 7496 | "{\n" |
| 7497 | " return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7498 | "}\n" |
| 7499 | "\n" |
| 7500 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7501 | "_mm256_mulhi_epi16(__m256i __a, __m256i __b)\n" |
| 7502 | "{\n" |
| 7503 | " return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7504 | "}\n" |
| 7505 | "\n" |
| 7506 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7507 | "_mm256_mullo_epi16(__m256i __a, __m256i __b)\n" |
| 7508 | "{\n" |
| 7509 | " return (__m256i)((__v16hu)__a * (__v16hu)__b);\n" |
| 7510 | "}\n" |
| 7511 | "\n" |
| 7512 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7513 | "_mm256_mullo_epi32 (__m256i __a, __m256i __b)\n" |
| 7514 | "{\n" |
| 7515 | " return (__m256i)((__v8su)__a * (__v8su)__b);\n" |
| 7516 | "}\n" |
| 7517 | "\n" |
| 7518 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7519 | "_mm256_mul_epu32(__m256i __a, __m256i __b)\n" |
| 7520 | "{\n" |
| 7521 | " return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);\n" |
| 7522 | "}\n" |
| 7523 | "\n" |
| 7524 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7525 | "_mm256_or_si256(__m256i __a, __m256i __b)\n" |
| 7526 | "{\n" |
| 7527 | " return (__m256i)((__v4du)__a | (__v4du)__b);\n" |
| 7528 | "}\n" |
| 7529 | "\n" |
| 7530 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7531 | "_mm256_sad_epu8(__m256i __a, __m256i __b)\n" |
| 7532 | "{\n" |
| 7533 | " return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);\n" |
| 7534 | "}\n" |
| 7535 | "\n" |
| 7536 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7537 | "_mm256_shuffle_epi8(__m256i __a, __m256i __b)\n" |
| 7538 | "{\n" |
| 7539 | " return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);\n" |
| 7540 | "}\n" |
| 7541 | "\n" |
| 7542 | "#define _mm256_shuffle_epi32(a, imm) \\\n" |
| 7543 | " (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))\n" |
| 7544 | "\n" |
| 7545 | "#define _mm256_shufflehi_epi16(a, imm) \\\n" |
| 7546 | " (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))\n" |
| 7547 | "\n" |
| 7548 | "#define _mm256_shufflelo_epi16(a, imm) \\\n" |
| 7549 | " (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))\n" |
| 7550 | "\n" |
| 7551 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7552 | "_mm256_sign_epi8(__m256i __a, __m256i __b)\n" |
| 7553 | "{\n" |
| 7554 | " return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);\n" |
| 7555 | "}\n" |
| 7556 | "\n" |
| 7557 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7558 | "_mm256_sign_epi16(__m256i __a, __m256i __b)\n" |
| 7559 | "{\n" |
| 7560 | " return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7561 | "}\n" |
| 7562 | "\n" |
| 7563 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7564 | "_mm256_sign_epi32(__m256i __a, __m256i __b)\n" |
| 7565 | "{\n" |
| 7566 | " return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);\n" |
| 7567 | "}\n" |
| 7568 | "\n" |
| 7569 | "#define _mm256_slli_si256(a, imm) \\\n" |
| 7570 | " (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n" |
| 7571 | "\n" |
| 7572 | "#define _mm256_bslli_epi128(a, imm) \\\n" |
| 7573 | " (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))\n" |
| 7574 | "\n" |
| 7575 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7576 | "_mm256_slli_epi16(__m256i __a, int __count)\n" |
| 7577 | "{\n" |
| 7578 | " return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);\n" |
| 7579 | "}\n" |
| 7580 | "\n" |
| 7581 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7582 | "_mm256_sll_epi16(__m256i __a, __m128i __count)\n" |
| 7583 | "{\n" |
| 7584 | " return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);\n" |
| 7585 | "}\n" |
| 7586 | "\n" |
| 7587 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7588 | "_mm256_slli_epi32(__m256i __a, int __count)\n" |
| 7589 | "{\n" |
| 7590 | " return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);\n" |
| 7591 | "}\n" |
| 7592 | "\n" |
| 7593 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7594 | "_mm256_sll_epi32(__m256i __a, __m128i __count)\n" |
| 7595 | "{\n" |
| 7596 | " return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);\n" |
| 7597 | "}\n" |
| 7598 | "\n" |
| 7599 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7600 | "_mm256_slli_epi64(__m256i __a, int __count)\n" |
| 7601 | "{\n" |
| 7602 | " return __builtin_ia32_psllqi256((__v4di)__a, __count);\n" |
| 7603 | "}\n" |
| 7604 | "\n" |
| 7605 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7606 | "_mm256_sll_epi64(__m256i __a, __m128i __count)\n" |
| 7607 | "{\n" |
| 7608 | " return __builtin_ia32_psllq256((__v4di)__a, __count);\n" |
| 7609 | "}\n" |
| 7610 | "\n" |
| 7611 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7612 | "_mm256_srai_epi16(__m256i __a, int __count)\n" |
| 7613 | "{\n" |
| 7614 | " return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);\n" |
| 7615 | "}\n" |
| 7616 | "\n" |
| 7617 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7618 | "_mm256_sra_epi16(__m256i __a, __m128i __count)\n" |
| 7619 | "{\n" |
| 7620 | " return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);\n" |
| 7621 | "}\n" |
| 7622 | "\n" |
| 7623 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7624 | "_mm256_srai_epi32(__m256i __a, int __count)\n" |
| 7625 | "{\n" |
| 7626 | " return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);\n" |
| 7627 | "}\n" |
| 7628 | "\n" |
| 7629 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7630 | "_mm256_sra_epi32(__m256i __a, __m128i __count)\n" |
| 7631 | "{\n" |
| 7632 | " return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);\n" |
| 7633 | "}\n" |
| 7634 | "\n" |
| 7635 | "#define _mm256_srli_si256(a, imm) \\\n" |
| 7636 | " (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n" |
| 7637 | "\n" |
| 7638 | "#define _mm256_bsrli_epi128(a, imm) \\\n" |
| 7639 | " (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))\n" |
| 7640 | "\n" |
| 7641 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7642 | "_mm256_srli_epi16(__m256i __a, int __count)\n" |
| 7643 | "{\n" |
| 7644 | " return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);\n" |
| 7645 | "}\n" |
| 7646 | "\n" |
| 7647 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7648 | "_mm256_srl_epi16(__m256i __a, __m128i __count)\n" |
| 7649 | "{\n" |
| 7650 | " return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);\n" |
| 7651 | "}\n" |
| 7652 | "\n" |
| 7653 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7654 | "_mm256_srli_epi32(__m256i __a, int __count)\n" |
| 7655 | "{\n" |
| 7656 | " return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);\n" |
| 7657 | "}\n" |
| 7658 | "\n" |
| 7659 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7660 | "_mm256_srl_epi32(__m256i __a, __m128i __count)\n" |
| 7661 | "{\n" |
| 7662 | " return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);\n" |
| 7663 | "}\n" |
| 7664 | "\n" |
| 7665 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7666 | "_mm256_srli_epi64(__m256i __a, int __count)\n" |
| 7667 | "{\n" |
| 7668 | " return __builtin_ia32_psrlqi256((__v4di)__a, __count);\n" |
| 7669 | "}\n" |
| 7670 | "\n" |
| 7671 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7672 | "_mm256_srl_epi64(__m256i __a, __m128i __count)\n" |
| 7673 | "{\n" |
| 7674 | " return __builtin_ia32_psrlq256((__v4di)__a, __count);\n" |
| 7675 | "}\n" |
| 7676 | "\n" |
| 7677 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7678 | "_mm256_sub_epi8(__m256i __a, __m256i __b)\n" |
| 7679 | "{\n" |
| 7680 | " return (__m256i)((__v32qu)__a - (__v32qu)__b);\n" |
| 7681 | "}\n" |
| 7682 | "\n" |
| 7683 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7684 | "_mm256_sub_epi16(__m256i __a, __m256i __b)\n" |
| 7685 | "{\n" |
| 7686 | " return (__m256i)((__v16hu)__a - (__v16hu)__b);\n" |
| 7687 | "}\n" |
| 7688 | "\n" |
| 7689 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7690 | "_mm256_sub_epi32(__m256i __a, __m256i __b)\n" |
| 7691 | "{\n" |
| 7692 | " return (__m256i)((__v8su)__a - (__v8su)__b);\n" |
| 7693 | "}\n" |
| 7694 | "\n" |
| 7695 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7696 | "_mm256_sub_epi64(__m256i __a, __m256i __b)\n" |
| 7697 | "{\n" |
| 7698 | " return (__m256i)((__v4du)__a - (__v4du)__b);\n" |
| 7699 | "}\n" |
| 7700 | "\n" |
| 7701 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7702 | "_mm256_subs_epi8(__m256i __a, __m256i __b)\n" |
| 7703 | "{\n" |
| 7704 | " return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);\n" |
| 7705 | "}\n" |
| 7706 | "\n" |
| 7707 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7708 | "_mm256_subs_epi16(__m256i __a, __m256i __b)\n" |
| 7709 | "{\n" |
| 7710 | " return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7711 | "}\n" |
| 7712 | "\n" |
| 7713 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7714 | "_mm256_subs_epu8(__m256i __a, __m256i __b)\n" |
| 7715 | "{\n" |
| 7716 | " return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);\n" |
| 7717 | "}\n" |
| 7718 | "\n" |
| 7719 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7720 | "_mm256_subs_epu16(__m256i __a, __m256i __b)\n" |
| 7721 | "{\n" |
| 7722 | " return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);\n" |
| 7723 | "}\n" |
| 7724 | "\n" |
| 7725 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7726 | "_mm256_unpackhi_epi8(__m256i __a, __m256i __b)\n" |
| 7727 | "{\n" |
| 7728 | " return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);\n" |
| 7729 | "}\n" |
| 7730 | "\n" |
| 7731 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7732 | "_mm256_unpackhi_epi16(__m256i __a, __m256i __b)\n" |
| 7733 | "{\n" |
| 7734 | " return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n" |
| 7735 | "}\n" |
| 7736 | "\n" |
| 7737 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7738 | "_mm256_unpackhi_epi32(__m256i __a, __m256i __b)\n" |
| 7739 | "{\n" |
| 7740 | " return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);\n" |
| 7741 | "}\n" |
| 7742 | "\n" |
| 7743 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7744 | "_mm256_unpackhi_epi64(__m256i __a, __m256i __b)\n" |
| 7745 | "{\n" |
| 7746 | " return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);\n" |
| 7747 | "}\n" |
| 7748 | "\n" |
| 7749 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7750 | "_mm256_unpacklo_epi8(__m256i __a, __m256i __b)\n" |
| 7751 | "{\n" |
| 7752 | " return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);\n" |
| 7753 | "}\n" |
| 7754 | "\n" |
| 7755 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7756 | "_mm256_unpacklo_epi16(__m256i __a, __m256i __b)\n" |
| 7757 | "{\n" |
| 7758 | " return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);\n" |
| 7759 | "}\n" |
| 7760 | "\n" |
| 7761 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7762 | "_mm256_unpacklo_epi32(__m256i __a, __m256i __b)\n" |
| 7763 | "{\n" |
| 7764 | " return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);\n" |
| 7765 | "}\n" |
| 7766 | "\n" |
| 7767 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7768 | "_mm256_unpacklo_epi64(__m256i __a, __m256i __b)\n" |
| 7769 | "{\n" |
| 7770 | " return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);\n" |
| 7771 | "}\n" |
| 7772 | "\n" |
| 7773 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7774 | "_mm256_xor_si256(__m256i __a, __m256i __b)\n" |
| 7775 | "{\n" |
| 7776 | " return (__m256i)((__v4du)__a ^ (__v4du)__b);\n" |
| 7777 | "}\n" |
| 7778 | "\n" |
| 7779 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7780 | "_mm256_stream_load_si256(__m256i const *__V)\n" |
| 7781 | "{\n" |
| 7782 | " typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n" |
| 7783 | " return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);\n" |
| 7784 | "}\n" |
| 7785 | "\n" |
| 7786 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 7787 | "_mm_broadcastss_ps(__m128 __X)\n" |
| 7788 | "{\n" |
| 7789 | " return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);\n" |
| 7790 | "}\n" |
| 7791 | "\n" |
| 7792 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 7793 | "_mm_broadcastsd_pd(__m128d __a)\n" |
| 7794 | "{\n" |
| 7795 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n" |
| 7796 | "}\n" |
| 7797 | "\n" |
| 7798 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 7799 | "_mm256_broadcastss_ps(__m128 __X)\n" |
| 7800 | "{\n" |
| 7801 | " return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
| 7802 | "}\n" |
| 7803 | "\n" |
| 7804 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 7805 | "_mm256_broadcastsd_pd(__m128d __X)\n" |
| 7806 | "{\n" |
| 7807 | " return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);\n" |
| 7808 | "}\n" |
| 7809 | "\n" |
| 7810 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7811 | "_mm256_broadcastsi128_si256(__m128i __X)\n" |
| 7812 | "{\n" |
| 7813 | " return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);\n" |
| 7814 | "}\n" |
| 7815 | "\n" |
| 7816 | "#define _mm_blend_epi32(V1, V2, M) \\\n" |
| 7817 | " (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \\\n" |
| 7818 | " (__v4si)(__m128i)(V2), (int)(M))\n" |
| 7819 | "\n" |
| 7820 | "#define _mm256_blend_epi32(V1, V2, M) \\\n" |
| 7821 | " (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \\\n" |
| 7822 | " (__v8si)(__m256i)(V2), (int)(M))\n" |
| 7823 | "\n" |
| 7824 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7825 | "_mm256_broadcastb_epi8(__m128i __X)\n" |
| 7826 | "{\n" |
| 7827 | " return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
| 7828 | "}\n" |
| 7829 | "\n" |
| 7830 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7831 | "_mm256_broadcastw_epi16(__m128i __X)\n" |
| 7832 | "{\n" |
| 7833 | " return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
| 7834 | "}\n" |
| 7835 | "\n" |
| 7836 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7837 | "_mm256_broadcastd_epi32(__m128i __X)\n" |
| 7838 | "{\n" |
| 7839 | " return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
| 7840 | "}\n" |
| 7841 | "\n" |
| 7842 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7843 | "_mm256_broadcastq_epi64(__m128i __X)\n" |
| 7844 | "{\n" |
| 7845 | " return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);\n" |
| 7846 | "}\n" |
| 7847 | "\n" |
| 7848 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7849 | "_mm_broadcastb_epi8(__m128i __X)\n" |
| 7850 | "{\n" |
| 7851 | " return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
| 7852 | "}\n" |
| 7853 | "\n" |
| 7854 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7855 | "_mm_broadcastw_epi16(__m128i __X)\n" |
| 7856 | "{\n" |
| 7857 | " return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);\n" |
| 7858 | "}\n" |
| 7859 | "\n" |
| 7860 | "\n" |
| 7861 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7862 | "_mm_broadcastd_epi32(__m128i __X)\n" |
| 7863 | "{\n" |
| 7864 | " return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);\n" |
| 7865 | "}\n" |
| 7866 | "\n" |
| 7867 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7868 | "_mm_broadcastq_epi64(__m128i __X)\n" |
| 7869 | "{\n" |
| 7870 | " return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);\n" |
| 7871 | "}\n" |
| 7872 | "\n" |
| 7873 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7874 | "_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)\n" |
| 7875 | "{\n" |
| 7876 | " return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);\n" |
| 7877 | "}\n" |
| 7878 | "\n" |
| 7879 | "#define _mm256_permute4x64_pd(V, M) \\\n" |
| 7880 | " (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))\n" |
| 7881 | "\n" |
| 7882 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 7883 | "_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)\n" |
| 7884 | "{\n" |
| 7885 | " return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);\n" |
| 7886 | "}\n" |
| 7887 | "\n" |
| 7888 | "#define _mm256_permute4x64_epi64(V, M) \\\n" |
| 7889 | " (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))\n" |
| 7890 | "\n" |
| 7891 | "#define _mm256_permute2x128_si256(V1, V2, M) \\\n" |
| 7892 | " (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))\n" |
| 7893 | "\n" |
| 7894 | "#define _mm256_extracti128_si256(V, M) \\\n" |
| 7895 | " (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))\n" |
| 7896 | "\n" |
| 7897 | "#define _mm256_inserti128_si256(V1, V2, M) \\\n" |
| 7898 | " (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \\\n" |
| 7899 | " (__v2di)(__m128i)(V2), (int)(M))\n" |
| 7900 | "\n" |
| 7901 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7902 | "_mm256_maskload_epi32(int const *__X, __m256i __M)\n" |
| 7903 | "{\n" |
| 7904 | " return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);\n" |
| 7905 | "}\n" |
| 7906 | "\n" |
| 7907 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7908 | "_mm256_maskload_epi64(long long const *__X, __m256i __M)\n" |
| 7909 | "{\n" |
| 7910 | " return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);\n" |
| 7911 | "}\n" |
| 7912 | "\n" |
| 7913 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7914 | "_mm_maskload_epi32(int const *__X, __m128i __M)\n" |
| 7915 | "{\n" |
| 7916 | " return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);\n" |
| 7917 | "}\n" |
| 7918 | "\n" |
| 7919 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7920 | "_mm_maskload_epi64(long long const *__X, __m128i __M)\n" |
| 7921 | "{\n" |
| 7922 | " return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);\n" |
| 7923 | "}\n" |
| 7924 | "\n" |
| 7925 | "static __inline__ void __DEFAULT_FN_ATTRS256\n" |
| 7926 | "_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)\n" |
| 7927 | "{\n" |
| 7928 | " __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);\n" |
| 7929 | "}\n" |
| 7930 | "\n" |
| 7931 | "static __inline__ void __DEFAULT_FN_ATTRS256\n" |
| 7932 | "_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)\n" |
| 7933 | "{\n" |
| 7934 | " __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);\n" |
| 7935 | "}\n" |
| 7936 | "\n" |
| 7937 | "static __inline__ void __DEFAULT_FN_ATTRS128\n" |
| 7938 | "_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)\n" |
| 7939 | "{\n" |
| 7940 | " __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);\n" |
| 7941 | "}\n" |
| 7942 | "\n" |
| 7943 | "static __inline__ void __DEFAULT_FN_ATTRS128\n" |
| 7944 | "_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)\n" |
| 7945 | "{\n" |
| 7946 | " __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);\n" |
| 7947 | "}\n" |
| 7948 | "\n" |
| 7949 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7950 | "_mm256_sllv_epi32(__m256i __X, __m256i __Y)\n" |
| 7951 | "{\n" |
| 7952 | " return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);\n" |
| 7953 | "}\n" |
| 7954 | "\n" |
| 7955 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7956 | "_mm_sllv_epi32(__m128i __X, __m128i __Y)\n" |
| 7957 | "{\n" |
| 7958 | " return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);\n" |
| 7959 | "}\n" |
| 7960 | "\n" |
| 7961 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7962 | "_mm256_sllv_epi64(__m256i __X, __m256i __Y)\n" |
| 7963 | "{\n" |
| 7964 | " return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);\n" |
| 7965 | "}\n" |
| 7966 | "\n" |
| 7967 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7968 | "_mm_sllv_epi64(__m128i __X, __m128i __Y)\n" |
| 7969 | "{\n" |
| 7970 | " return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);\n" |
| 7971 | "}\n" |
| 7972 | "\n" |
| 7973 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7974 | "_mm256_srav_epi32(__m256i __X, __m256i __Y)\n" |
| 7975 | "{\n" |
| 7976 | " return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);\n" |
| 7977 | "}\n" |
| 7978 | "\n" |
| 7979 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7980 | "_mm_srav_epi32(__m128i __X, __m128i __Y)\n" |
| 7981 | "{\n" |
| 7982 | " return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);\n" |
| 7983 | "}\n" |
| 7984 | "\n" |
| 7985 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7986 | "_mm256_srlv_epi32(__m256i __X, __m256i __Y)\n" |
| 7987 | "{\n" |
| 7988 | " return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);\n" |
| 7989 | "}\n" |
| 7990 | "\n" |
| 7991 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 7992 | "_mm_srlv_epi32(__m128i __X, __m128i __Y)\n" |
| 7993 | "{\n" |
| 7994 | " return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);\n" |
| 7995 | "}\n" |
| 7996 | "\n" |
| 7997 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 7998 | "_mm256_srlv_epi64(__m256i __X, __m256i __Y)\n" |
| 7999 | "{\n" |
| 8000 | " return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);\n" |
| 8001 | "}\n" |
| 8002 | "\n" |
| 8003 | "static __inline__ __m128i __DEFAULT_FN_ATTRS128\n" |
| 8004 | "_mm_srlv_epi64(__m128i __X, __m128i __Y)\n" |
| 8005 | "{\n" |
| 8006 | " return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);\n" |
| 8007 | "}\n" |
| 8008 | "\n" |
| 8009 | "#define _mm_mask_i32gather_pd(a, m, i, mask, s) \\\n" |
| 8010 | " (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \\\n" |
| 8011 | " (double const *)(m), \\\n" |
| 8012 | " (__v4si)(__m128i)(i), \\\n" |
| 8013 | " (__v2df)(__m128d)(mask), (s))\n" |
| 8014 | "\n" |
| 8015 | "#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \\\n" |
| 8016 | " (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \\\n" |
| 8017 | " (double const *)(m), \\\n" |
| 8018 | " (__v4si)(__m128i)(i), \\\n" |
| 8019 | " (__v4df)(__m256d)(mask), (s))\n" |
| 8020 | "\n" |
| 8021 | "#define _mm_mask_i64gather_pd(a, m, i, mask, s) \\\n" |
| 8022 | " (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \\\n" |
| 8023 | " (double const *)(m), \\\n" |
| 8024 | " (__v2di)(__m128i)(i), \\\n" |
| 8025 | " (__v2df)(__m128d)(mask), (s))\n" |
| 8026 | "\n" |
| 8027 | "#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \\\n" |
| 8028 | " (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \\\n" |
| 8029 | " (double const *)(m), \\\n" |
| 8030 | " (__v4di)(__m256i)(i), \\\n" |
| 8031 | " (__v4df)(__m256d)(mask), (s))\n" |
| 8032 | "\n" |
| 8033 | "#define _mm_mask_i32gather_ps(a, m, i, mask, s) \\\n" |
| 8034 | " (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \\\n" |
| 8035 | " (float const *)(m), \\\n" |
| 8036 | " (__v4si)(__m128i)(i), \\\n" |
| 8037 | " (__v4sf)(__m128)(mask), (s))\n" |
| 8038 | "\n" |
| 8039 | "#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \\\n" |
| 8040 | " (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \\\n" |
| 8041 | " (float const *)(m), \\\n" |
| 8042 | " (__v8si)(__m256i)(i), \\\n" |
| 8043 | " (__v8sf)(__m256)(mask), (s))\n" |
| 8044 | "\n" |
| 8045 | "#define _mm_mask_i64gather_ps(a, m, i, mask, s) \\\n" |
| 8046 | " (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \\\n" |
| 8047 | " (float const *)(m), \\\n" |
| 8048 | " (__v2di)(__m128i)(i), \\\n" |
| 8049 | " (__v4sf)(__m128)(mask), (s))\n" |
| 8050 | "\n" |
| 8051 | "#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \\\n" |
| 8052 | " (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \\\n" |
| 8053 | " (float const *)(m), \\\n" |
| 8054 | " (__v4di)(__m256i)(i), \\\n" |
| 8055 | " (__v4sf)(__m128)(mask), (s))\n" |
| 8056 | "\n" |
| 8057 | "#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \\\n" |
| 8058 | " (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \\\n" |
| 8059 | " (int const *)(m), \\\n" |
| 8060 | " (__v4si)(__m128i)(i), \\\n" |
| 8061 | " (__v4si)(__m128i)(mask), (s))\n" |
| 8062 | "\n" |
| 8063 | "#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \\\n" |
| 8064 | " (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \\\n" |
| 8065 | " (int const *)(m), \\\n" |
| 8066 | " (__v8si)(__m256i)(i), \\\n" |
| 8067 | " (__v8si)(__m256i)(mask), (s))\n" |
| 8068 | "\n" |
| 8069 | "#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \\\n" |
| 8070 | " (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \\\n" |
| 8071 | " (int const *)(m), \\\n" |
| 8072 | " (__v2di)(__m128i)(i), \\\n" |
| 8073 | " (__v4si)(__m128i)(mask), (s))\n" |
| 8074 | "\n" |
| 8075 | "#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \\\n" |
| 8076 | " (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \\\n" |
| 8077 | " (int const *)(m), \\\n" |
| 8078 | " (__v4di)(__m256i)(i), \\\n" |
| 8079 | " (__v4si)(__m128i)(mask), (s))\n" |
| 8080 | "\n" |
| 8081 | "#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \\\n" |
| 8082 | " (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \\\n" |
| 8083 | " (long long const *)(m), \\\n" |
| 8084 | " (__v4si)(__m128i)(i), \\\n" |
| 8085 | " (__v2di)(__m128i)(mask), (s))\n" |
| 8086 | "\n" |
| 8087 | "#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \\\n" |
| 8088 | " (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \\\n" |
| 8089 | " (long long const *)(m), \\\n" |
| 8090 | " (__v4si)(__m128i)(i), \\\n" |
| 8091 | " (__v4di)(__m256i)(mask), (s))\n" |
| 8092 | "\n" |
| 8093 | "#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \\\n" |
| 8094 | " (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \\\n" |
| 8095 | " (long long const *)(m), \\\n" |
| 8096 | " (__v2di)(__m128i)(i), \\\n" |
| 8097 | " (__v2di)(__m128i)(mask), (s))\n" |
| 8098 | "\n" |
| 8099 | "#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \\\n" |
| 8100 | " (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \\\n" |
| 8101 | " (long long const *)(m), \\\n" |
| 8102 | " (__v4di)(__m256i)(i), \\\n" |
| 8103 | " (__v4di)(__m256i)(mask), (s))\n" |
| 8104 | "\n" |
| 8105 | "#define _mm_i32gather_pd(m, i, s) \\\n" |
| 8106 | " (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \\\n" |
| 8107 | " (double const *)(m), \\\n" |
| 8108 | " (__v4si)(__m128i)(i), \\\n" |
| 8109 | " (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n" |
| 8110 | " _mm_setzero_pd()), \\\n" |
| 8111 | " (s))\n" |
| 8112 | "\n" |
| 8113 | "#define _mm256_i32gather_pd(m, i, s) \\\n" |
| 8114 | " (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \\\n" |
| 8115 | " (double const *)(m), \\\n" |
| 8116 | " (__v4si)(__m128i)(i), \\\n" |
| 8117 | " (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n" |
| 8118 | " _mm256_setzero_pd(), \\\n" |
| 8119 | " _CMP_EQ_OQ), \\\n" |
| 8120 | " (s))\n" |
| 8121 | "\n" |
| 8122 | "#define _mm_i64gather_pd(m, i, s) \\\n" |
| 8123 | " (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \\\n" |
| 8124 | " (double const *)(m), \\\n" |
| 8125 | " (__v2di)(__m128i)(i), \\\n" |
| 8126 | " (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \\\n" |
| 8127 | " _mm_setzero_pd()), \\\n" |
| 8128 | " (s))\n" |
| 8129 | "\n" |
| 8130 | "#define _mm256_i64gather_pd(m, i, s) \\\n" |
| 8131 | " (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \\\n" |
| 8132 | " (double const *)(m), \\\n" |
| 8133 | " (__v4di)(__m256i)(i), \\\n" |
| 8134 | " (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \\\n" |
| 8135 | " _mm256_setzero_pd(), \\\n" |
| 8136 | " _CMP_EQ_OQ), \\\n" |
| 8137 | " (s))\n" |
| 8138 | "\n" |
| 8139 | "#define _mm_i32gather_ps(m, i, s) \\\n" |
| 8140 | " (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \\\n" |
| 8141 | " (float const *)(m), \\\n" |
| 8142 | " (__v4si)(__m128i)(i), \\\n" |
| 8143 | " (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n" |
| 8144 | " _mm_setzero_ps()), \\\n" |
| 8145 | " (s))\n" |
| 8146 | "\n" |
| 8147 | "#define _mm256_i32gather_ps(m, i, s) \\\n" |
| 8148 | " (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \\\n" |
| 8149 | " (float const *)(m), \\\n" |
| 8150 | " (__v8si)(__m256i)(i), \\\n" |
| 8151 | " (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \\\n" |
| 8152 | " _mm256_setzero_ps(), \\\n" |
| 8153 | " _CMP_EQ_OQ), \\\n" |
| 8154 | " (s))\n" |
| 8155 | "\n" |
| 8156 | "#define _mm_i64gather_ps(m, i, s) \\\n" |
| 8157 | " (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \\\n" |
| 8158 | " (float const *)(m), \\\n" |
| 8159 | " (__v2di)(__m128i)(i), \\\n" |
| 8160 | " (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n" |
| 8161 | " _mm_setzero_ps()), \\\n" |
| 8162 | " (s))\n" |
| 8163 | "\n" |
| 8164 | "#define _mm256_i64gather_ps(m, i, s) \\\n" |
| 8165 | " (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \\\n" |
| 8166 | " (float const *)(m), \\\n" |
| 8167 | " (__v4di)(__m256i)(i), \\\n" |
| 8168 | " (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \\\n" |
| 8169 | " _mm_setzero_ps()), \\\n" |
| 8170 | " (s))\n" |
| 8171 | "\n" |
| 8172 | "#define _mm_i32gather_epi32(m, i, s) \\\n" |
| 8173 | " (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \\\n" |
| 8174 | " (int const *)(m), (__v4si)(__m128i)(i), \\\n" |
| 8175 | " (__v4si)_mm_set1_epi32(-1), (s))\n" |
| 8176 | "\n" |
| 8177 | "#define _mm256_i32gather_epi32(m, i, s) \\\n" |
| 8178 | " (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \\\n" |
| 8179 | " (int const *)(m), (__v8si)(__m256i)(i), \\\n" |
| 8180 | " (__v8si)_mm256_set1_epi32(-1), (s))\n" |
| 8181 | "\n" |
| 8182 | "#define _mm_i64gather_epi32(m, i, s) \\\n" |
| 8183 | " (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \\\n" |
| 8184 | " (int const *)(m), (__v2di)(__m128i)(i), \\\n" |
| 8185 | " (__v4si)_mm_set1_epi32(-1), (s))\n" |
| 8186 | "\n" |
| 8187 | "#define _mm256_i64gather_epi32(m, i, s) \\\n" |
| 8188 | " (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \\\n" |
| 8189 | " (int const *)(m), (__v4di)(__m256i)(i), \\\n" |
| 8190 | " (__v4si)_mm_set1_epi32(-1), (s))\n" |
| 8191 | "\n" |
| 8192 | "#define _mm_i32gather_epi64(m, i, s) \\\n" |
| 8193 | " (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \\\n" |
| 8194 | " (long long const *)(m), \\\n" |
| 8195 | " (__v4si)(__m128i)(i), \\\n" |
| 8196 | " (__v2di)_mm_set1_epi64x(-1), (s))\n" |
| 8197 | "\n" |
| 8198 | "#define _mm256_i32gather_epi64(m, i, s) \\\n" |
| 8199 | " (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \\\n" |
| 8200 | " (long long const *)(m), \\\n" |
| 8201 | " (__v4si)(__m128i)(i), \\\n" |
| 8202 | " (__v4di)_mm256_set1_epi64x(-1), (s))\n" |
| 8203 | "\n" |
| 8204 | "#define _mm_i64gather_epi64(m, i, s) \\\n" |
| 8205 | " (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \\\n" |
| 8206 | " (long long const *)(m), \\\n" |
| 8207 | " (__v2di)(__m128i)(i), \\\n" |
| 8208 | " (__v2di)_mm_set1_epi64x(-1), (s))\n" |
| 8209 | "\n" |
| 8210 | "#define _mm256_i64gather_epi64(m, i, s) \\\n" |
| 8211 | " (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \\\n" |
| 8212 | " (long long const *)(m), \\\n" |
| 8213 | " (__v4di)(__m256i)(i), \\\n" |
| 8214 | " (__v4di)_mm256_set1_epi64x(-1), (s))\n" |
| 8215 | "\n" |
| 8216 | "#undef __DEFAULT_FN_ATTRS256\n" |
| 8217 | "#undef __DEFAULT_FN_ATTRS128\n" |
| 8218 | "\n" |
| 8219 | "#endif /* __AVX2INTRIN_H */\n" |
| 8220 | "" } , |
| 8221 | { "/builtins/avxintrin.h" , "/*===---- avxintrin.h - AVX intrinsics -------------------------------------===\n" |
| 8222 | " *\n" |
| 8223 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 8224 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 8225 | " * in the Software without restriction, including without limitation the rights\n" |
| 8226 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 8227 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 8228 | " * furnished to do so, subject to the following conditions:\n" |
| 8229 | " *\n" |
| 8230 | " * The above copyright notice and this permission notice shall be included in\n" |
| 8231 | " * all copies or substantial portions of the Software.\n" |
| 8232 | " *\n" |
| 8233 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 8234 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 8235 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 8236 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 8237 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 8238 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 8239 | " * THE SOFTWARE.\n" |
| 8240 | " *\n" |
| 8241 | " *===-----------------------------------------------------------------------===\n" |
| 8242 | " */\n" |
| 8243 | "\n" |
| 8244 | "#ifndef __IMMINTRIN_H\n" |
| 8245 | "#error \"Never use <avxintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 8246 | "#endif\n" |
| 8247 | "\n" |
| 8248 | "#ifndef __AVXINTRIN_H\n" |
| 8249 | "#define __AVXINTRIN_H\n" |
| 8250 | "\n" |
| 8251 | "typedef double __v4df __attribute__ ((__vector_size__ (32)));\n" |
| 8252 | "typedef float __v8sf __attribute__ ((__vector_size__ (32)));\n" |
| 8253 | "typedef long long __v4di __attribute__ ((__vector_size__ (32)));\n" |
| 8254 | "typedef int __v8si __attribute__ ((__vector_size__ (32)));\n" |
| 8255 | "typedef short __v16hi __attribute__ ((__vector_size__ (32)));\n" |
| 8256 | "typedef char __v32qi __attribute__ ((__vector_size__ (32)));\n" |
| 8257 | "\n" |
| 8258 | "/* Unsigned types */\n" |
| 8259 | "typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));\n" |
| 8260 | "typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n" |
| 8261 | "typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n" |
| 8262 | "typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));\n" |
| 8263 | "\n" |
| 8264 | "/* We need an explicitly signed variant for char. Note that this shouldn't\n" |
| 8265 | " * appear in the interface though. */\n" |
| 8266 | "typedef signed char __v32qs __attribute__((__vector_size__(32)));\n" |
| 8267 | "\n" |
| 8268 | "typedef float __m256 __attribute__ ((__vector_size__ (32)));\n" |
| 8269 | "typedef double __m256d __attribute__((__vector_size__(32)));\n" |
| 8270 | "typedef long long __m256i __attribute__((__vector_size__(32)));\n" |
| 8271 | "\n" |
| 8272 | "/* Define the default attributes for the functions in this file. */\n" |
| 8273 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(256)))\n" |
| 8274 | "#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx\"), __min_vector_width__(128)))\n" |
| 8275 | "\n" |
| 8276 | "/* Arithmetic */\n" |
| 8277 | "/// Adds two 256-bit vectors of [4 x double].\n" |
| 8278 | "///\n" |
| 8279 | "/// \\headerfile <x86intrin.h>\n" |
| 8280 | "///\n" |
| 8281 | "/// This intrinsic corresponds to the <c> VADDPD </c> instruction.\n" |
| 8282 | "///\n" |
| 8283 | "/// \\param __a\n" |
| 8284 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8285 | "/// \\param __b\n" |
| 8286 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8287 | "/// \\returns A 256-bit vector of [4 x double] containing the sums of both\n" |
| 8288 | "/// operands.\n" |
| 8289 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8290 | "_mm256_add_pd(__m256d __a, __m256d __b)\n" |
| 8291 | "{\n" |
| 8292 | " return (__m256d)((__v4df)__a+(__v4df)__b);\n" |
| 8293 | "}\n" |
| 8294 | "\n" |
| 8295 | "/// Adds two 256-bit vectors of [8 x float].\n" |
| 8296 | "///\n" |
| 8297 | "/// \\headerfile <x86intrin.h>\n" |
| 8298 | "///\n" |
| 8299 | "/// This intrinsic corresponds to the <c> VADDPS </c> instruction.\n" |
| 8300 | "///\n" |
| 8301 | "/// \\param __a\n" |
| 8302 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8303 | "/// \\param __b\n" |
| 8304 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8305 | "/// \\returns A 256-bit vector of [8 x float] containing the sums of both\n" |
| 8306 | "/// operands.\n" |
| 8307 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8308 | "_mm256_add_ps(__m256 __a, __m256 __b)\n" |
| 8309 | "{\n" |
| 8310 | " return (__m256)((__v8sf)__a+(__v8sf)__b);\n" |
| 8311 | "}\n" |
| 8312 | "\n" |
| 8313 | "/// Subtracts two 256-bit vectors of [4 x double].\n" |
| 8314 | "///\n" |
| 8315 | "/// \\headerfile <x86intrin.h>\n" |
| 8316 | "///\n" |
| 8317 | "/// This intrinsic corresponds to the <c> VSUBPD </c> instruction.\n" |
| 8318 | "///\n" |
| 8319 | "/// \\param __a\n" |
| 8320 | "/// A 256-bit vector of [4 x double] containing the minuend.\n" |
| 8321 | "/// \\param __b\n" |
| 8322 | "/// A 256-bit vector of [4 x double] containing the subtrahend.\n" |
| 8323 | "/// \\returns A 256-bit vector of [4 x double] containing the differences between\n" |
| 8324 | "/// both operands.\n" |
| 8325 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8326 | "_mm256_sub_pd(__m256d __a, __m256d __b)\n" |
| 8327 | "{\n" |
| 8328 | " return (__m256d)((__v4df)__a-(__v4df)__b);\n" |
| 8329 | "}\n" |
| 8330 | "\n" |
| 8331 | "/// Subtracts two 256-bit vectors of [8 x float].\n" |
| 8332 | "///\n" |
| 8333 | "/// \\headerfile <x86intrin.h>\n" |
| 8334 | "///\n" |
| 8335 | "/// This intrinsic corresponds to the <c> VSUBPS </c> instruction.\n" |
| 8336 | "///\n" |
| 8337 | "/// \\param __a\n" |
| 8338 | "/// A 256-bit vector of [8 x float] containing the minuend.\n" |
| 8339 | "/// \\param __b\n" |
| 8340 | "/// A 256-bit vector of [8 x float] containing the subtrahend.\n" |
| 8341 | "/// \\returns A 256-bit vector of [8 x float] containing the differences between\n" |
| 8342 | "/// both operands.\n" |
| 8343 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8344 | "_mm256_sub_ps(__m256 __a, __m256 __b)\n" |
| 8345 | "{\n" |
| 8346 | " return (__m256)((__v8sf)__a-(__v8sf)__b);\n" |
| 8347 | "}\n" |
| 8348 | "\n" |
| 8349 | "/// Adds the even-indexed values and subtracts the odd-indexed values of\n" |
| 8350 | "/// two 256-bit vectors of [4 x double].\n" |
| 8351 | "///\n" |
| 8352 | "/// \\headerfile <x86intrin.h>\n" |
| 8353 | "///\n" |
| 8354 | "/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n" |
| 8355 | "///\n" |
| 8356 | "/// \\param __a\n" |
| 8357 | "/// A 256-bit vector of [4 x double] containing the left source operand.\n" |
| 8358 | "/// \\param __b\n" |
| 8359 | "/// A 256-bit vector of [4 x double] containing the right source operand.\n" |
| 8360 | "/// \\returns A 256-bit vector of [4 x double] containing the alternating sums\n" |
| 8361 | "/// and differences between both operands.\n" |
| 8362 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8363 | "_mm256_addsub_pd(__m256d __a, __m256d __b)\n" |
| 8364 | "{\n" |
| 8365 | " return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);\n" |
| 8366 | "}\n" |
| 8367 | "\n" |
| 8368 | "/// Adds the even-indexed values and subtracts the odd-indexed values of\n" |
| 8369 | "/// two 256-bit vectors of [8 x float].\n" |
| 8370 | "///\n" |
| 8371 | "/// \\headerfile <x86intrin.h>\n" |
| 8372 | "///\n" |
| 8373 | "/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n" |
| 8374 | "///\n" |
| 8375 | "/// \\param __a\n" |
| 8376 | "/// A 256-bit vector of [8 x float] containing the left source operand.\n" |
| 8377 | "/// \\param __b\n" |
| 8378 | "/// A 256-bit vector of [8 x float] containing the right source operand.\n" |
| 8379 | "/// \\returns A 256-bit vector of [8 x float] containing the alternating sums and\n" |
| 8380 | "/// differences between both operands.\n" |
| 8381 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8382 | "_mm256_addsub_ps(__m256 __a, __m256 __b)\n" |
| 8383 | "{\n" |
| 8384 | " return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);\n" |
| 8385 | "}\n" |
| 8386 | "\n" |
| 8387 | "/// Divides two 256-bit vectors of [4 x double].\n" |
| 8388 | "///\n" |
| 8389 | "/// \\headerfile <x86intrin.h>\n" |
| 8390 | "///\n" |
| 8391 | "/// This intrinsic corresponds to the <c> VDIVPD </c> instruction.\n" |
| 8392 | "///\n" |
| 8393 | "/// \\param __a\n" |
| 8394 | "/// A 256-bit vector of [4 x double] containing the dividend.\n" |
| 8395 | "/// \\param __b\n" |
| 8396 | "/// A 256-bit vector of [4 x double] containing the divisor.\n" |
| 8397 | "/// \\returns A 256-bit vector of [4 x double] containing the quotients of both\n" |
| 8398 | "/// operands.\n" |
| 8399 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8400 | "_mm256_div_pd(__m256d __a, __m256d __b)\n" |
| 8401 | "{\n" |
| 8402 | " return (__m256d)((__v4df)__a/(__v4df)__b);\n" |
| 8403 | "}\n" |
| 8404 | "\n" |
| 8405 | "/// Divides two 256-bit vectors of [8 x float].\n" |
| 8406 | "///\n" |
| 8407 | "/// \\headerfile <x86intrin.h>\n" |
| 8408 | "///\n" |
| 8409 | "/// This intrinsic corresponds to the <c> VDIVPS </c> instruction.\n" |
| 8410 | "///\n" |
| 8411 | "/// \\param __a\n" |
| 8412 | "/// A 256-bit vector of [8 x float] containing the dividend.\n" |
| 8413 | "/// \\param __b\n" |
| 8414 | "/// A 256-bit vector of [8 x float] containing the divisor.\n" |
| 8415 | "/// \\returns A 256-bit vector of [8 x float] containing the quotients of both\n" |
| 8416 | "/// operands.\n" |
| 8417 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8418 | "_mm256_div_ps(__m256 __a, __m256 __b)\n" |
| 8419 | "{\n" |
| 8420 | " return (__m256)((__v8sf)__a/(__v8sf)__b);\n" |
| 8421 | "}\n" |
| 8422 | "\n" |
| 8423 | "/// Compares two 256-bit vectors of [4 x double] and returns the greater\n" |
| 8424 | "/// of each pair of values.\n" |
| 8425 | "///\n" |
| 8426 | "/// \\headerfile <x86intrin.h>\n" |
| 8427 | "///\n" |
| 8428 | "/// This intrinsic corresponds to the <c> VMAXPD </c> instruction.\n" |
| 8429 | "///\n" |
| 8430 | "/// \\param __a\n" |
| 8431 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
| 8432 | "/// \\param __b\n" |
| 8433 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
| 8434 | "/// \\returns A 256-bit vector of [4 x double] containing the maximum values\n" |
| 8435 | "/// between both operands.\n" |
| 8436 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8437 | "_mm256_max_pd(__m256d __a, __m256d __b)\n" |
| 8438 | "{\n" |
| 8439 | " return (__m256d)__builtin_ia32_maxpd256((__v4df)__a, (__v4df)__b);\n" |
| 8440 | "}\n" |
| 8441 | "\n" |
| 8442 | "/// Compares two 256-bit vectors of [8 x float] and returns the greater\n" |
| 8443 | "/// of each pair of values.\n" |
| 8444 | "///\n" |
| 8445 | "/// \\headerfile <x86intrin.h>\n" |
| 8446 | "///\n" |
| 8447 | "/// This intrinsic corresponds to the <c> VMAXPS </c> instruction.\n" |
| 8448 | "///\n" |
| 8449 | "/// \\param __a\n" |
| 8450 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
| 8451 | "/// \\param __b\n" |
| 8452 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
| 8453 | "/// \\returns A 256-bit vector of [8 x float] containing the maximum values\n" |
| 8454 | "/// between both operands.\n" |
| 8455 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8456 | "_mm256_max_ps(__m256 __a, __m256 __b)\n" |
| 8457 | "{\n" |
| 8458 | " return (__m256)__builtin_ia32_maxps256((__v8sf)__a, (__v8sf)__b);\n" |
| 8459 | "}\n" |
| 8460 | "\n" |
| 8461 | "/// Compares two 256-bit vectors of [4 x double] and returns the lesser\n" |
| 8462 | "/// of each pair of values.\n" |
| 8463 | "///\n" |
| 8464 | "/// \\headerfile <x86intrin.h>\n" |
| 8465 | "///\n" |
| 8466 | "/// This intrinsic corresponds to the <c> VMINPD </c> instruction.\n" |
| 8467 | "///\n" |
| 8468 | "/// \\param __a\n" |
| 8469 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
| 8470 | "/// \\param __b\n" |
| 8471 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
| 8472 | "/// \\returns A 256-bit vector of [4 x double] containing the minimum values\n" |
| 8473 | "/// between both operands.\n" |
| 8474 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8475 | "_mm256_min_pd(__m256d __a, __m256d __b)\n" |
| 8476 | "{\n" |
| 8477 | " return (__m256d)__builtin_ia32_minpd256((__v4df)__a, (__v4df)__b);\n" |
| 8478 | "}\n" |
| 8479 | "\n" |
| 8480 | "/// Compares two 256-bit vectors of [8 x float] and returns the lesser\n" |
| 8481 | "/// of each pair of values.\n" |
| 8482 | "///\n" |
| 8483 | "/// \\headerfile <x86intrin.h>\n" |
| 8484 | "///\n" |
| 8485 | "/// This intrinsic corresponds to the <c> VMINPS </c> instruction.\n" |
| 8486 | "///\n" |
| 8487 | "/// \\param __a\n" |
| 8488 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
| 8489 | "/// \\param __b\n" |
| 8490 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
| 8491 | "/// \\returns A 256-bit vector of [8 x float] containing the minimum values\n" |
| 8492 | "/// between both operands.\n" |
| 8493 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8494 | "_mm256_min_ps(__m256 __a, __m256 __b)\n" |
| 8495 | "{\n" |
| 8496 | " return (__m256)__builtin_ia32_minps256((__v8sf)__a, (__v8sf)__b);\n" |
| 8497 | "}\n" |
| 8498 | "\n" |
| 8499 | "/// Multiplies two 256-bit vectors of [4 x double].\n" |
| 8500 | "///\n" |
| 8501 | "/// \\headerfile <x86intrin.h>\n" |
| 8502 | "///\n" |
| 8503 | "/// This intrinsic corresponds to the <c> VMULPD </c> instruction.\n" |
| 8504 | "///\n" |
| 8505 | "/// \\param __a\n" |
| 8506 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
| 8507 | "/// \\param __b\n" |
| 8508 | "/// A 256-bit vector of [4 x double] containing one of the operands.\n" |
| 8509 | "/// \\returns A 256-bit vector of [4 x double] containing the products of both\n" |
| 8510 | "/// operands.\n" |
| 8511 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8512 | "_mm256_mul_pd(__m256d __a, __m256d __b)\n" |
| 8513 | "{\n" |
| 8514 | " return (__m256d)((__v4df)__a * (__v4df)__b);\n" |
| 8515 | "}\n" |
| 8516 | "\n" |
| 8517 | "/// Multiplies two 256-bit vectors of [8 x float].\n" |
| 8518 | "///\n" |
| 8519 | "/// \\headerfile <x86intrin.h>\n" |
| 8520 | "///\n" |
| 8521 | "/// This intrinsic corresponds to the <c> VMULPS </c> instruction.\n" |
| 8522 | "///\n" |
| 8523 | "/// \\param __a\n" |
| 8524 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
| 8525 | "/// \\param __b\n" |
| 8526 | "/// A 256-bit vector of [8 x float] containing one of the operands.\n" |
| 8527 | "/// \\returns A 256-bit vector of [8 x float] containing the products of both\n" |
| 8528 | "/// operands.\n" |
| 8529 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8530 | "_mm256_mul_ps(__m256 __a, __m256 __b)\n" |
| 8531 | "{\n" |
| 8532 | " return (__m256)((__v8sf)__a * (__v8sf)__b);\n" |
| 8533 | "}\n" |
| 8534 | "\n" |
| 8535 | "/// Calculates the square roots of the values in a 256-bit vector of\n" |
| 8536 | "/// [4 x double].\n" |
| 8537 | "///\n" |
| 8538 | "/// \\headerfile <x86intrin.h>\n" |
| 8539 | "///\n" |
| 8540 | "/// This intrinsic corresponds to the <c> VSQRTPD </c> instruction.\n" |
| 8541 | "///\n" |
| 8542 | "/// \\param __a\n" |
| 8543 | "/// A 256-bit vector of [4 x double].\n" |
| 8544 | "/// \\returns A 256-bit vector of [4 x double] containing the square roots of the\n" |
| 8545 | "/// values in the operand.\n" |
| 8546 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8547 | "_mm256_sqrt_pd(__m256d __a)\n" |
| 8548 | "{\n" |
| 8549 | " return (__m256d)__builtin_ia32_sqrtpd256((__v4df)__a);\n" |
| 8550 | "}\n" |
| 8551 | "\n" |
| 8552 | "/// Calculates the square roots of the values in a 256-bit vector of\n" |
| 8553 | "/// [8 x float].\n" |
| 8554 | "///\n" |
| 8555 | "/// \\headerfile <x86intrin.h>\n" |
| 8556 | "///\n" |
| 8557 | "/// This intrinsic corresponds to the <c> VSQRTPS </c> instruction.\n" |
| 8558 | "///\n" |
| 8559 | "/// \\param __a\n" |
| 8560 | "/// A 256-bit vector of [8 x float].\n" |
| 8561 | "/// \\returns A 256-bit vector of [8 x float] containing the square roots of the\n" |
| 8562 | "/// values in the operand.\n" |
| 8563 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8564 | "_mm256_sqrt_ps(__m256 __a)\n" |
| 8565 | "{\n" |
| 8566 | " return (__m256)__builtin_ia32_sqrtps256((__v8sf)__a);\n" |
| 8567 | "}\n" |
| 8568 | "\n" |
| 8569 | "/// Calculates the reciprocal square roots of the values in a 256-bit\n" |
| 8570 | "/// vector of [8 x float].\n" |
| 8571 | "///\n" |
| 8572 | "/// \\headerfile <x86intrin.h>\n" |
| 8573 | "///\n" |
| 8574 | "/// This intrinsic corresponds to the <c> VRSQRTPS </c> instruction.\n" |
| 8575 | "///\n" |
| 8576 | "/// \\param __a\n" |
| 8577 | "/// A 256-bit vector of [8 x float].\n" |
| 8578 | "/// \\returns A 256-bit vector of [8 x float] containing the reciprocal square\n" |
| 8579 | "/// roots of the values in the operand.\n" |
| 8580 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8581 | "_mm256_rsqrt_ps(__m256 __a)\n" |
| 8582 | "{\n" |
| 8583 | " return (__m256)__builtin_ia32_rsqrtps256((__v8sf)__a);\n" |
| 8584 | "}\n" |
| 8585 | "\n" |
| 8586 | "/// Calculates the reciprocals of the values in a 256-bit vector of\n" |
| 8587 | "/// [8 x float].\n" |
| 8588 | "///\n" |
| 8589 | "/// \\headerfile <x86intrin.h>\n" |
| 8590 | "///\n" |
| 8591 | "/// This intrinsic corresponds to the <c> VRCPPS </c> instruction.\n" |
| 8592 | "///\n" |
| 8593 | "/// \\param __a\n" |
| 8594 | "/// A 256-bit vector of [8 x float].\n" |
| 8595 | "/// \\returns A 256-bit vector of [8 x float] containing the reciprocals of the\n" |
| 8596 | "/// values in the operand.\n" |
| 8597 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8598 | "_mm256_rcp_ps(__m256 __a)\n" |
| 8599 | "{\n" |
| 8600 | " return (__m256)__builtin_ia32_rcpps256((__v8sf)__a);\n" |
| 8601 | "}\n" |
| 8602 | "\n" |
| 8603 | "/// Rounds the values in a 256-bit vector of [4 x double] as specified\n" |
| 8604 | "/// by the byte operand. The source values are rounded to integer values and\n" |
| 8605 | "/// returned as 64-bit double-precision floating-point values.\n" |
| 8606 | "///\n" |
| 8607 | "/// \\headerfile <x86intrin.h>\n" |
| 8608 | "///\n" |
| 8609 | "/// \\code\n" |
| 8610 | "/// __m256d _mm256_round_pd(__m256d V, const int M);\n" |
| 8611 | "/// \\endcode\n" |
| 8612 | "///\n" |
| 8613 | "/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n" |
| 8614 | "///\n" |
| 8615 | "/// \\param V\n" |
| 8616 | "/// A 256-bit vector of [4 x double].\n" |
| 8617 | "/// \\param M\n" |
| 8618 | "/// An integer value that specifies the rounding operation. \\n\n" |
| 8619 | "/// Bits [7:4] are reserved. \\n\n" |
| 8620 | "/// Bit [3] is a precision exception value: \\n\n" |
| 8621 | "/// 0: A normal PE exception is used. \\n\n" |
| 8622 | "/// 1: The PE field is not updated. \\n\n" |
| 8623 | "/// Bit [2] is the rounding control source: \\n\n" |
| 8624 | "/// 0: Use bits [1:0] of \\a M. \\n\n" |
| 8625 | "/// 1: Use the current MXCSR setting. \\n\n" |
| 8626 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
| 8627 | "/// 00: Nearest. \\n\n" |
| 8628 | "/// 01: Downward (toward negative infinity). \\n\n" |
| 8629 | "/// 10: Upward (toward positive infinity). \\n\n" |
| 8630 | "/// 11: Truncated.\n" |
| 8631 | "/// \\returns A 256-bit vector of [4 x double] containing the rounded values.\n" |
| 8632 | "#define _mm256_round_pd(V, M) \\\n" |
| 8633 | " (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))\n" |
| 8634 | "\n" |
| 8635 | "/// Rounds the values stored in a 256-bit vector of [8 x float] as\n" |
| 8636 | "/// specified by the byte operand. The source values are rounded to integer\n" |
| 8637 | "/// values and returned as floating-point values.\n" |
| 8638 | "///\n" |
| 8639 | "/// \\headerfile <x86intrin.h>\n" |
| 8640 | "///\n" |
| 8641 | "/// \\code\n" |
| 8642 | "/// __m256 _mm256_round_ps(__m256 V, const int M);\n" |
| 8643 | "/// \\endcode\n" |
| 8644 | "///\n" |
| 8645 | "/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n" |
| 8646 | "///\n" |
| 8647 | "/// \\param V\n" |
| 8648 | "/// A 256-bit vector of [8 x float].\n" |
| 8649 | "/// \\param M\n" |
| 8650 | "/// An integer value that specifies the rounding operation. \\n\n" |
| 8651 | "/// Bits [7:4] are reserved. \\n\n" |
| 8652 | "/// Bit [3] is a precision exception value: \\n\n" |
| 8653 | "/// 0: A normal PE exception is used. \\n\n" |
| 8654 | "/// 1: The PE field is not updated. \\n\n" |
| 8655 | "/// Bit [2] is the rounding control source: \\n\n" |
| 8656 | "/// 0: Use bits [1:0] of \\a M. \\n\n" |
| 8657 | "/// 1: Use the current MXCSR setting. \\n\n" |
| 8658 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
| 8659 | "/// 00: Nearest. \\n\n" |
| 8660 | "/// 01: Downward (toward negative infinity). \\n\n" |
| 8661 | "/// 10: Upward (toward positive infinity). \\n\n" |
| 8662 | "/// 11: Truncated.\n" |
| 8663 | "/// \\returns A 256-bit vector of [8 x float] containing the rounded values.\n" |
| 8664 | "#define _mm256_round_ps(V, M) \\\n" |
| 8665 | " (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))\n" |
| 8666 | "\n" |
| 8667 | "/// Rounds up the values stored in a 256-bit vector of [4 x double]. The\n" |
| 8668 | "/// source values are rounded up to integer values and returned as 64-bit\n" |
| 8669 | "/// double-precision floating-point values.\n" |
| 8670 | "///\n" |
| 8671 | "/// \\headerfile <x86intrin.h>\n" |
| 8672 | "///\n" |
| 8673 | "/// \\code\n" |
| 8674 | "/// __m256d _mm256_ceil_pd(__m256d V);\n" |
| 8675 | "/// \\endcode\n" |
| 8676 | "///\n" |
| 8677 | "/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n" |
| 8678 | "///\n" |
| 8679 | "/// \\param V\n" |
| 8680 | "/// A 256-bit vector of [4 x double].\n" |
| 8681 | "/// \\returns A 256-bit vector of [4 x double] containing the rounded up values.\n" |
| 8682 | "#define _mm256_ceil_pd(V) _mm256_round_pd((V), _MM_FROUND_CEIL)\n" |
| 8683 | "\n" |
| 8684 | "/// Rounds down the values stored in a 256-bit vector of [4 x double].\n" |
| 8685 | "/// The source values are rounded down to integer values and returned as\n" |
| 8686 | "/// 64-bit double-precision floating-point values.\n" |
| 8687 | "///\n" |
| 8688 | "/// \\headerfile <x86intrin.h>\n" |
| 8689 | "///\n" |
| 8690 | "/// \\code\n" |
| 8691 | "/// __m256d _mm256_floor_pd(__m256d V);\n" |
| 8692 | "/// \\endcode\n" |
| 8693 | "///\n" |
| 8694 | "/// This intrinsic corresponds to the <c> VROUNDPD </c> instruction.\n" |
| 8695 | "///\n" |
| 8696 | "/// \\param V\n" |
| 8697 | "/// A 256-bit vector of [4 x double].\n" |
| 8698 | "/// \\returns A 256-bit vector of [4 x double] containing the rounded down\n" |
| 8699 | "/// values.\n" |
| 8700 | "#define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)\n" |
| 8701 | "\n" |
| 8702 | "/// Rounds up the values stored in a 256-bit vector of [8 x float]. The\n" |
| 8703 | "/// source values are rounded up to integer values and returned as\n" |
| 8704 | "/// floating-point values.\n" |
| 8705 | "///\n" |
| 8706 | "/// \\headerfile <x86intrin.h>\n" |
| 8707 | "///\n" |
| 8708 | "/// \\code\n" |
| 8709 | "/// __m256 _mm256_ceil_ps(__m256 V);\n" |
| 8710 | "/// \\endcode\n" |
| 8711 | "///\n" |
| 8712 | "/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n" |
| 8713 | "///\n" |
| 8714 | "/// \\param V\n" |
| 8715 | "/// A 256-bit vector of [8 x float].\n" |
| 8716 | "/// \\returns A 256-bit vector of [8 x float] containing the rounded up values.\n" |
| 8717 | "#define _mm256_ceil_ps(V) _mm256_round_ps((V), _MM_FROUND_CEIL)\n" |
| 8718 | "\n" |
| 8719 | "/// Rounds down the values stored in a 256-bit vector of [8 x float]. The\n" |
| 8720 | "/// source values are rounded down to integer values and returned as\n" |
| 8721 | "/// floating-point values.\n" |
| 8722 | "///\n" |
| 8723 | "/// \\headerfile <x86intrin.h>\n" |
| 8724 | "///\n" |
| 8725 | "/// \\code\n" |
| 8726 | "/// __m256 _mm256_floor_ps(__m256 V);\n" |
| 8727 | "/// \\endcode\n" |
| 8728 | "///\n" |
| 8729 | "/// This intrinsic corresponds to the <c> VROUNDPS </c> instruction.\n" |
| 8730 | "///\n" |
| 8731 | "/// \\param V\n" |
| 8732 | "/// A 256-bit vector of [8 x float].\n" |
| 8733 | "/// \\returns A 256-bit vector of [8 x float] containing the rounded down values.\n" |
| 8734 | "#define _mm256_floor_ps(V) _mm256_round_ps((V), _MM_FROUND_FLOOR)\n" |
| 8735 | "\n" |
| 8736 | "/* Logical */\n" |
| 8737 | "/// Performs a bitwise AND of two 256-bit vectors of [4 x double].\n" |
| 8738 | "///\n" |
| 8739 | "/// \\headerfile <x86intrin.h>\n" |
| 8740 | "///\n" |
| 8741 | "/// This intrinsic corresponds to the <c> VANDPD </c> instruction.\n" |
| 8742 | "///\n" |
| 8743 | "/// \\param __a\n" |
| 8744 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8745 | "/// \\param __b\n" |
| 8746 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8747 | "/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n" |
| 8748 | "/// values between both operands.\n" |
| 8749 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8750 | "_mm256_and_pd(__m256d __a, __m256d __b)\n" |
| 8751 | "{\n" |
| 8752 | " return (__m256d)((__v4du)__a & (__v4du)__b);\n" |
| 8753 | "}\n" |
| 8754 | "\n" |
| 8755 | "/// Performs a bitwise AND of two 256-bit vectors of [8 x float].\n" |
| 8756 | "///\n" |
| 8757 | "/// \\headerfile <x86intrin.h>\n" |
| 8758 | "///\n" |
| 8759 | "/// This intrinsic corresponds to the <c> VANDPS </c> instruction.\n" |
| 8760 | "///\n" |
| 8761 | "/// \\param __a\n" |
| 8762 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8763 | "/// \\param __b\n" |
| 8764 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8765 | "/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n" |
| 8766 | "/// values between both operands.\n" |
| 8767 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8768 | "_mm256_and_ps(__m256 __a, __m256 __b)\n" |
| 8769 | "{\n" |
| 8770 | " return (__m256)((__v8su)__a & (__v8su)__b);\n" |
| 8771 | "}\n" |
| 8772 | "\n" |
| 8773 | "/// Performs a bitwise AND of two 256-bit vectors of [4 x double], using\n" |
| 8774 | "/// the one's complement of the values contained in the first source operand.\n" |
| 8775 | "///\n" |
| 8776 | "/// \\headerfile <x86intrin.h>\n" |
| 8777 | "///\n" |
| 8778 | "/// This intrinsic corresponds to the <c> VANDNPD </c> instruction.\n" |
| 8779 | "///\n" |
| 8780 | "/// \\param __a\n" |
| 8781 | "/// A 256-bit vector of [4 x double] containing the left source operand. The\n" |
| 8782 | "/// one's complement of this value is used in the bitwise AND.\n" |
| 8783 | "/// \\param __b\n" |
| 8784 | "/// A 256-bit vector of [4 x double] containing the right source operand.\n" |
| 8785 | "/// \\returns A 256-bit vector of [4 x double] containing the bitwise AND of the\n" |
| 8786 | "/// values of the second operand and the one's complement of the first\n" |
| 8787 | "/// operand.\n" |
| 8788 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8789 | "_mm256_andnot_pd(__m256d __a, __m256d __b)\n" |
| 8790 | "{\n" |
| 8791 | " return (__m256d)(~(__v4du)__a & (__v4du)__b);\n" |
| 8792 | "}\n" |
| 8793 | "\n" |
| 8794 | "/// Performs a bitwise AND of two 256-bit vectors of [8 x float], using\n" |
| 8795 | "/// the one's complement of the values contained in the first source operand.\n" |
| 8796 | "///\n" |
| 8797 | "/// \\headerfile <x86intrin.h>\n" |
| 8798 | "///\n" |
| 8799 | "/// This intrinsic corresponds to the <c> VANDNPS </c> instruction.\n" |
| 8800 | "///\n" |
| 8801 | "/// \\param __a\n" |
| 8802 | "/// A 256-bit vector of [8 x float] containing the left source operand. The\n" |
| 8803 | "/// one's complement of this value is used in the bitwise AND.\n" |
| 8804 | "/// \\param __b\n" |
| 8805 | "/// A 256-bit vector of [8 x float] containing the right source operand.\n" |
| 8806 | "/// \\returns A 256-bit vector of [8 x float] containing the bitwise AND of the\n" |
| 8807 | "/// values of the second operand and the one's complement of the first\n" |
| 8808 | "/// operand.\n" |
| 8809 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8810 | "_mm256_andnot_ps(__m256 __a, __m256 __b)\n" |
| 8811 | "{\n" |
| 8812 | " return (__m256)(~(__v8su)__a & (__v8su)__b);\n" |
| 8813 | "}\n" |
| 8814 | "\n" |
| 8815 | "/// Performs a bitwise OR of two 256-bit vectors of [4 x double].\n" |
| 8816 | "///\n" |
| 8817 | "/// \\headerfile <x86intrin.h>\n" |
| 8818 | "///\n" |
| 8819 | "/// This intrinsic corresponds to the <c> VORPD </c> instruction.\n" |
| 8820 | "///\n" |
| 8821 | "/// \\param __a\n" |
| 8822 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8823 | "/// \\param __b\n" |
| 8824 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8825 | "/// \\returns A 256-bit vector of [4 x double] containing the bitwise OR of the\n" |
| 8826 | "/// values between both operands.\n" |
| 8827 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8828 | "_mm256_or_pd(__m256d __a, __m256d __b)\n" |
| 8829 | "{\n" |
| 8830 | " return (__m256d)((__v4du)__a | (__v4du)__b);\n" |
| 8831 | "}\n" |
| 8832 | "\n" |
| 8833 | "/// Performs a bitwise OR of two 256-bit vectors of [8 x float].\n" |
| 8834 | "///\n" |
| 8835 | "/// \\headerfile <x86intrin.h>\n" |
| 8836 | "///\n" |
| 8837 | "/// This intrinsic corresponds to the <c> VORPS </c> instruction.\n" |
| 8838 | "///\n" |
| 8839 | "/// \\param __a\n" |
| 8840 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8841 | "/// \\param __b\n" |
| 8842 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8843 | "/// \\returns A 256-bit vector of [8 x float] containing the bitwise OR of the\n" |
| 8844 | "/// values between both operands.\n" |
| 8845 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8846 | "_mm256_or_ps(__m256 __a, __m256 __b)\n" |
| 8847 | "{\n" |
| 8848 | " return (__m256)((__v8su)__a | (__v8su)__b);\n" |
| 8849 | "}\n" |
| 8850 | "\n" |
| 8851 | "/// Performs a bitwise XOR of two 256-bit vectors of [4 x double].\n" |
| 8852 | "///\n" |
| 8853 | "/// \\headerfile <x86intrin.h>\n" |
| 8854 | "///\n" |
| 8855 | "/// This intrinsic corresponds to the <c> VXORPD </c> instruction.\n" |
| 8856 | "///\n" |
| 8857 | "/// \\param __a\n" |
| 8858 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8859 | "/// \\param __b\n" |
| 8860 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8861 | "/// \\returns A 256-bit vector of [4 x double] containing the bitwise XOR of the\n" |
| 8862 | "/// values between both operands.\n" |
| 8863 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8864 | "_mm256_xor_pd(__m256d __a, __m256d __b)\n" |
| 8865 | "{\n" |
| 8866 | " return (__m256d)((__v4du)__a ^ (__v4du)__b);\n" |
| 8867 | "}\n" |
| 8868 | "\n" |
| 8869 | "/// Performs a bitwise XOR of two 256-bit vectors of [8 x float].\n" |
| 8870 | "///\n" |
| 8871 | "/// \\headerfile <x86intrin.h>\n" |
| 8872 | "///\n" |
| 8873 | "/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n" |
| 8874 | "///\n" |
| 8875 | "/// \\param __a\n" |
| 8876 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8877 | "/// \\param __b\n" |
| 8878 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8879 | "/// \\returns A 256-bit vector of [8 x float] containing the bitwise XOR of the\n" |
| 8880 | "/// values between both operands.\n" |
| 8881 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8882 | "_mm256_xor_ps(__m256 __a, __m256 __b)\n" |
| 8883 | "{\n" |
| 8884 | " return (__m256)((__v8su)__a ^ (__v8su)__b);\n" |
| 8885 | "}\n" |
| 8886 | "\n" |
| 8887 | "/* Horizontal arithmetic */\n" |
| 8888 | "/// Horizontally adds the adjacent pairs of values contained in two\n" |
| 8889 | "/// 256-bit vectors of [4 x double].\n" |
| 8890 | "///\n" |
| 8891 | "/// \\headerfile <x86intrin.h>\n" |
| 8892 | "///\n" |
| 8893 | "/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n" |
| 8894 | "///\n" |
| 8895 | "/// \\param __a\n" |
| 8896 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8897 | "/// The horizontal sums of the values are returned in the even-indexed\n" |
| 8898 | "/// elements of a vector of [4 x double].\n" |
| 8899 | "/// \\param __b\n" |
| 8900 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8901 | "/// The horizontal sums of the values are returned in the odd-indexed\n" |
| 8902 | "/// elements of a vector of [4 x double].\n" |
| 8903 | "/// \\returns A 256-bit vector of [4 x double] containing the horizontal sums of\n" |
| 8904 | "/// both operands.\n" |
| 8905 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8906 | "_mm256_hadd_pd(__m256d __a, __m256d __b)\n" |
| 8907 | "{\n" |
| 8908 | " return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b);\n" |
| 8909 | "}\n" |
| 8910 | "\n" |
| 8911 | "/// Horizontally adds the adjacent pairs of values contained in two\n" |
| 8912 | "/// 256-bit vectors of [8 x float].\n" |
| 8913 | "///\n" |
| 8914 | "/// \\headerfile <x86intrin.h>\n" |
| 8915 | "///\n" |
| 8916 | "/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n" |
| 8917 | "///\n" |
| 8918 | "/// \\param __a\n" |
| 8919 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8920 | "/// The horizontal sums of the values are returned in the elements with\n" |
| 8921 | "/// index 0, 1, 4, 5 of a vector of [8 x float].\n" |
| 8922 | "/// \\param __b\n" |
| 8923 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8924 | "/// The horizontal sums of the values are returned in the elements with\n" |
| 8925 | "/// index 2, 3, 6, 7 of a vector of [8 x float].\n" |
| 8926 | "/// \\returns A 256-bit vector of [8 x float] containing the horizontal sums of\n" |
| 8927 | "/// both operands.\n" |
| 8928 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8929 | "_mm256_hadd_ps(__m256 __a, __m256 __b)\n" |
| 8930 | "{\n" |
| 8931 | " return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b);\n" |
| 8932 | "}\n" |
| 8933 | "\n" |
| 8934 | "/// Horizontally subtracts the adjacent pairs of values contained in two\n" |
| 8935 | "/// 256-bit vectors of [4 x double].\n" |
| 8936 | "///\n" |
| 8937 | "/// \\headerfile <x86intrin.h>\n" |
| 8938 | "///\n" |
| 8939 | "/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n" |
| 8940 | "///\n" |
| 8941 | "/// \\param __a\n" |
| 8942 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8943 | "/// The horizontal differences between the values are returned in the\n" |
| 8944 | "/// even-indexed elements of a vector of [4 x double].\n" |
| 8945 | "/// \\param __b\n" |
| 8946 | "/// A 256-bit vector of [4 x double] containing one of the source operands.\n" |
| 8947 | "/// The horizontal differences between the values are returned in the\n" |
| 8948 | "/// odd-indexed elements of a vector of [4 x double].\n" |
| 8949 | "/// \\returns A 256-bit vector of [4 x double] containing the horizontal\n" |
| 8950 | "/// differences of both operands.\n" |
| 8951 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 8952 | "_mm256_hsub_pd(__m256d __a, __m256d __b)\n" |
| 8953 | "{\n" |
| 8954 | " return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b);\n" |
| 8955 | "}\n" |
| 8956 | "\n" |
| 8957 | "/// Horizontally subtracts the adjacent pairs of values contained in two\n" |
| 8958 | "/// 256-bit vectors of [8 x float].\n" |
| 8959 | "///\n" |
| 8960 | "/// \\headerfile <x86intrin.h>\n" |
| 8961 | "///\n" |
| 8962 | "/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n" |
| 8963 | "///\n" |
| 8964 | "/// \\param __a\n" |
| 8965 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8966 | "/// The horizontal differences between the values are returned in the\n" |
| 8967 | "/// elements with index 0, 1, 4, 5 of a vector of [8 x float].\n" |
| 8968 | "/// \\param __b\n" |
| 8969 | "/// A 256-bit vector of [8 x float] containing one of the source operands.\n" |
| 8970 | "/// The horizontal differences between the values are returned in the\n" |
| 8971 | "/// elements with index 2, 3, 6, 7 of a vector of [8 x float].\n" |
| 8972 | "/// \\returns A 256-bit vector of [8 x float] containing the horizontal\n" |
| 8973 | "/// differences of both operands.\n" |
| 8974 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 8975 | "_mm256_hsub_ps(__m256 __a, __m256 __b)\n" |
| 8976 | "{\n" |
| 8977 | " return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b);\n" |
| 8978 | "}\n" |
| 8979 | "\n" |
| 8980 | "/* Vector permutations */\n" |
| 8981 | "/// Copies the values in a 128-bit vector of [2 x double] as specified\n" |
| 8982 | "/// by the 128-bit integer vector operand.\n" |
| 8983 | "///\n" |
| 8984 | "/// \\headerfile <x86intrin.h>\n" |
| 8985 | "///\n" |
| 8986 | "/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n" |
| 8987 | "///\n" |
| 8988 | "/// \\param __a\n" |
| 8989 | "/// A 128-bit vector of [2 x double].\n" |
| 8990 | "/// \\param __c\n" |
| 8991 | "/// A 128-bit integer vector operand specifying how the values are to be\n" |
| 8992 | "/// copied. \\n\n" |
| 8993 | "/// Bit [1]: \\n\n" |
| 8994 | "/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n" |
| 8995 | "/// vector. \\n\n" |
| 8996 | "/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n" |
| 8997 | "/// returned vector. \\n\n" |
| 8998 | "/// Bit [65]: \\n\n" |
| 8999 | "/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n" |
| 9000 | "/// returned vector. \\n\n" |
| 9001 | "/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n" |
| 9002 | "/// returned vector.\n" |
| 9003 | "/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n" |
| 9004 | "static __inline __m128d __DEFAULT_FN_ATTRS128\n" |
| 9005 | "_mm_permutevar_pd(__m128d __a, __m128i __c)\n" |
| 9006 | "{\n" |
| 9007 | " return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);\n" |
| 9008 | "}\n" |
| 9009 | "\n" |
| 9010 | "/// Copies the values in a 256-bit vector of [4 x double] as specified\n" |
| 9011 | "/// by the 256-bit integer vector operand.\n" |
| 9012 | "///\n" |
| 9013 | "/// \\headerfile <x86intrin.h>\n" |
| 9014 | "///\n" |
| 9015 | "/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n" |
| 9016 | "///\n" |
| 9017 | "/// \\param __a\n" |
| 9018 | "/// A 256-bit vector of [4 x double].\n" |
| 9019 | "/// \\param __c\n" |
| 9020 | "/// A 256-bit integer vector operand specifying how the values are to be\n" |
| 9021 | "/// copied. \\n\n" |
| 9022 | "/// Bit [1]: \\n\n" |
| 9023 | "/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n" |
| 9024 | "/// vector. \\n\n" |
| 9025 | "/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n" |
| 9026 | "/// returned vector. \\n\n" |
| 9027 | "/// Bit [65]: \\n\n" |
| 9028 | "/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n" |
| 9029 | "/// returned vector. \\n\n" |
| 9030 | "/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n" |
| 9031 | "/// returned vector. \\n\n" |
| 9032 | "/// Bit [129]: \\n\n" |
| 9033 | "/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n" |
| 9034 | "/// returned vector. \\n\n" |
| 9035 | "/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n" |
| 9036 | "/// returned vector. \\n\n" |
| 9037 | "/// Bit [193]: \\n\n" |
| 9038 | "/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n" |
| 9039 | "/// returned vector. \\n\n" |
| 9040 | "/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n" |
| 9041 | "/// returned vector.\n" |
| 9042 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
| 9043 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 9044 | "_mm256_permutevar_pd(__m256d __a, __m256i __c)\n" |
| 9045 | "{\n" |
| 9046 | " return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);\n" |
| 9047 | "}\n" |
| 9048 | "\n" |
| 9049 | "/// Copies the values stored in a 128-bit vector of [4 x float] as\n" |
| 9050 | "/// specified by the 128-bit integer vector operand.\n" |
| 9051 | "/// \\headerfile <x86intrin.h>\n" |
| 9052 | "///\n" |
| 9053 | "/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n" |
| 9054 | "///\n" |
| 9055 | "/// \\param __a\n" |
| 9056 | "/// A 128-bit vector of [4 x float].\n" |
| 9057 | "/// \\param __c\n" |
| 9058 | "/// A 128-bit integer vector operand specifying how the values are to be\n" |
| 9059 | "/// copied. \\n\n" |
| 9060 | "/// Bits [1:0]: \\n\n" |
| 9061 | "/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n" |
| 9062 | "/// returned vector. \\n\n" |
| 9063 | "/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n" |
| 9064 | "/// returned vector. \\n\n" |
| 9065 | "/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n" |
| 9066 | "/// returned vector. \\n\n" |
| 9067 | "/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n" |
| 9068 | "/// returned vector. \\n\n" |
| 9069 | "/// Bits [33:32]: \\n\n" |
| 9070 | "/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n" |
| 9071 | "/// returned vector. \\n\n" |
| 9072 | "/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n" |
| 9073 | "/// returned vector. \\n\n" |
| 9074 | "/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n" |
| 9075 | "/// returned vector. \\n\n" |
| 9076 | "/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n" |
| 9077 | "/// returned vector. \\n\n" |
| 9078 | "/// Bits [65:64]: \\n\n" |
| 9079 | "/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n" |
| 9080 | "/// returned vector. \\n\n" |
| 9081 | "/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n" |
| 9082 | "/// returned vector. \\n\n" |
| 9083 | "/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n" |
| 9084 | "/// returned vector. \\n\n" |
| 9085 | "/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n" |
| 9086 | "/// returned vector. \\n\n" |
| 9087 | "/// Bits [97:96]: \\n\n" |
| 9088 | "/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n" |
| 9089 | "/// returned vector. \\n\n" |
| 9090 | "/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n" |
| 9091 | "/// returned vector. \\n\n" |
| 9092 | "/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n" |
| 9093 | "/// returned vector. \\n\n" |
| 9094 | "/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n" |
| 9095 | "/// returned vector.\n" |
| 9096 | "/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n" |
| 9097 | "static __inline __m128 __DEFAULT_FN_ATTRS128\n" |
| 9098 | "_mm_permutevar_ps(__m128 __a, __m128i __c)\n" |
| 9099 | "{\n" |
| 9100 | " return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);\n" |
| 9101 | "}\n" |
| 9102 | "\n" |
| 9103 | "/// Copies the values stored in a 256-bit vector of [8 x float] as\n" |
| 9104 | "/// specified by the 256-bit integer vector operand.\n" |
| 9105 | "///\n" |
| 9106 | "/// \\headerfile <x86intrin.h>\n" |
| 9107 | "///\n" |
| 9108 | "/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n" |
| 9109 | "///\n" |
| 9110 | "/// \\param __a\n" |
| 9111 | "/// A 256-bit vector of [8 x float].\n" |
| 9112 | "/// \\param __c\n" |
| 9113 | "/// A 256-bit integer vector operand specifying how the values are to be\n" |
| 9114 | "/// copied. \\n\n" |
| 9115 | "/// Bits [1:0]: \\n\n" |
| 9116 | "/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n" |
| 9117 | "/// returned vector. \\n\n" |
| 9118 | "/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n" |
| 9119 | "/// returned vector. \\n\n" |
| 9120 | "/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n" |
| 9121 | "/// returned vector. \\n\n" |
| 9122 | "/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n" |
| 9123 | "/// returned vector. \\n\n" |
| 9124 | "/// Bits [33:32]: \\n\n" |
| 9125 | "/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n" |
| 9126 | "/// returned vector. \\n\n" |
| 9127 | "/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n" |
| 9128 | "/// returned vector. \\n\n" |
| 9129 | "/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n" |
| 9130 | "/// returned vector. \\n\n" |
| 9131 | "/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n" |
| 9132 | "/// returned vector. \\n\n" |
| 9133 | "/// Bits [65:64]: \\n\n" |
| 9134 | "/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n" |
| 9135 | "/// returned vector. \\n\n" |
| 9136 | "/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n" |
| 9137 | "/// returned vector. \\n\n" |
| 9138 | "/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n" |
| 9139 | "/// returned vector. \\n\n" |
| 9140 | "/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n" |
| 9141 | "/// returned vector. \\n\n" |
| 9142 | "/// Bits [97:96]: \\n\n" |
| 9143 | "/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n" |
| 9144 | "/// returned vector. \\n\n" |
| 9145 | "/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n" |
| 9146 | "/// returned vector. \\n\n" |
| 9147 | "/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n" |
| 9148 | "/// returned vector. \\n\n" |
| 9149 | "/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n" |
| 9150 | "/// returned vector. \\n\n" |
| 9151 | "/// Bits [129:128]: \\n\n" |
| 9152 | "/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n" |
| 9153 | "/// returned vector. \\n\n" |
| 9154 | "/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n" |
| 9155 | "/// returned vector. \\n\n" |
| 9156 | "/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n" |
| 9157 | "/// returned vector. \\n\n" |
| 9158 | "/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n" |
| 9159 | "/// returned vector. \\n\n" |
| 9160 | "/// Bits [161:160]: \\n\n" |
| 9161 | "/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n" |
| 9162 | "/// returned vector. \\n\n" |
| 9163 | "/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n" |
| 9164 | "/// returned vector. \\n\n" |
| 9165 | "/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n" |
| 9166 | "/// returned vector. \\n\n" |
| 9167 | "/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n" |
| 9168 | "/// returned vector. \\n\n" |
| 9169 | "/// Bits [193:192]: \\n\n" |
| 9170 | "/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n" |
| 9171 | "/// returned vector. \\n\n" |
| 9172 | "/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n" |
| 9173 | "/// returned vector. \\n\n" |
| 9174 | "/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n" |
| 9175 | "/// returned vector. \\n\n" |
| 9176 | "/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n" |
| 9177 | "/// returned vector. \\n\n" |
| 9178 | "/// Bits [225:224]: \\n\n" |
| 9179 | "/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n" |
| 9180 | "/// returned vector. \\n\n" |
| 9181 | "/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n" |
| 9182 | "/// returned vector. \\n\n" |
| 9183 | "/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n" |
| 9184 | "/// returned vector. \\n\n" |
| 9185 | "/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n" |
| 9186 | "/// returned vector.\n" |
| 9187 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
| 9188 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 9189 | "_mm256_permutevar_ps(__m256 __a, __m256i __c)\n" |
| 9190 | "{\n" |
| 9191 | " return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);\n" |
| 9192 | "}\n" |
| 9193 | "\n" |
| 9194 | "/// Copies the values in a 128-bit vector of [2 x double] as specified\n" |
| 9195 | "/// by the immediate integer operand.\n" |
| 9196 | "///\n" |
| 9197 | "/// \\headerfile <x86intrin.h>\n" |
| 9198 | "///\n" |
| 9199 | "/// \\code\n" |
| 9200 | "/// __m128d _mm_permute_pd(__m128d A, const int C);\n" |
| 9201 | "/// \\endcode\n" |
| 9202 | "///\n" |
| 9203 | "/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n" |
| 9204 | "///\n" |
| 9205 | "/// \\param A\n" |
| 9206 | "/// A 128-bit vector of [2 x double].\n" |
| 9207 | "/// \\param C\n" |
| 9208 | "/// An immediate integer operand specifying how the values are to be\n" |
| 9209 | "/// copied. \\n\n" |
| 9210 | "/// Bit [0]: \\n\n" |
| 9211 | "/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n" |
| 9212 | "/// vector. \\n\n" |
| 9213 | "/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n" |
| 9214 | "/// returned vector. \\n\n" |
| 9215 | "/// Bit [1]: \\n\n" |
| 9216 | "/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n" |
| 9217 | "/// returned vector. \\n\n" |
| 9218 | "/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n" |
| 9219 | "/// returned vector.\n" |
| 9220 | "/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n" |
| 9221 | "#define _mm_permute_pd(A, C) \\\n" |
| 9222 | " (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))\n" |
| 9223 | "\n" |
| 9224 | "/// Copies the values in a 256-bit vector of [4 x double] as specified by\n" |
| 9225 | "/// the immediate integer operand.\n" |
| 9226 | "///\n" |
| 9227 | "/// \\headerfile <x86intrin.h>\n" |
| 9228 | "///\n" |
| 9229 | "/// \\code\n" |
| 9230 | "/// __m256d _mm256_permute_pd(__m256d A, const int C);\n" |
| 9231 | "/// \\endcode\n" |
| 9232 | "///\n" |
| 9233 | "/// This intrinsic corresponds to the <c> VPERMILPD </c> instruction.\n" |
| 9234 | "///\n" |
| 9235 | "/// \\param A\n" |
| 9236 | "/// A 256-bit vector of [4 x double].\n" |
| 9237 | "/// \\param C\n" |
| 9238 | "/// An immediate integer operand specifying how the values are to be\n" |
| 9239 | "/// copied. \\n\n" |
| 9240 | "/// Bit [0]: \\n\n" |
| 9241 | "/// 0: Bits [63:0] of the source are copied to bits [63:0] of the returned\n" |
| 9242 | "/// vector. \\n\n" |
| 9243 | "/// 1: Bits [127:64] of the source are copied to bits [63:0] of the\n" |
| 9244 | "/// returned vector. \\n\n" |
| 9245 | "/// Bit [1]: \\n\n" |
| 9246 | "/// 0: Bits [63:0] of the source are copied to bits [127:64] of the\n" |
| 9247 | "/// returned vector. \\n\n" |
| 9248 | "/// 1: Bits [127:64] of the source are copied to bits [127:64] of the\n" |
| 9249 | "/// returned vector. \\n\n" |
| 9250 | "/// Bit [2]: \\n\n" |
| 9251 | "/// 0: Bits [191:128] of the source are copied to bits [191:128] of the\n" |
| 9252 | "/// returned vector. \\n\n" |
| 9253 | "/// 1: Bits [255:192] of the source are copied to bits [191:128] of the\n" |
| 9254 | "/// returned vector. \\n\n" |
| 9255 | "/// Bit [3]: \\n\n" |
| 9256 | "/// 0: Bits [191:128] of the source are copied to bits [255:192] of the\n" |
| 9257 | "/// returned vector. \\n\n" |
| 9258 | "/// 1: Bits [255:192] of the source are copied to bits [255:192] of the\n" |
| 9259 | "/// returned vector.\n" |
| 9260 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
| 9261 | "#define _mm256_permute_pd(A, C) \\\n" |
| 9262 | " (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))\n" |
| 9263 | "\n" |
| 9264 | "/// Copies the values in a 128-bit vector of [4 x float] as specified by\n" |
| 9265 | "/// the immediate integer operand.\n" |
| 9266 | "///\n" |
| 9267 | "/// \\headerfile <x86intrin.h>\n" |
| 9268 | "///\n" |
| 9269 | "/// \\code\n" |
| 9270 | "/// __m128 _mm_permute_ps(__m128 A, const int C);\n" |
| 9271 | "/// \\endcode\n" |
| 9272 | "///\n" |
| 9273 | "/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n" |
| 9274 | "///\n" |
| 9275 | "/// \\param A\n" |
| 9276 | "/// A 128-bit vector of [4 x float].\n" |
| 9277 | "/// \\param C\n" |
| 9278 | "/// An immediate integer operand specifying how the values are to be\n" |
| 9279 | "/// copied. \\n\n" |
| 9280 | "/// Bits [1:0]: \\n\n" |
| 9281 | "/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n" |
| 9282 | "/// returned vector. \\n\n" |
| 9283 | "/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n" |
| 9284 | "/// returned vector. \\n\n" |
| 9285 | "/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n" |
| 9286 | "/// returned vector. \\n\n" |
| 9287 | "/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n" |
| 9288 | "/// returned vector. \\n\n" |
| 9289 | "/// Bits [3:2]: \\n\n" |
| 9290 | "/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n" |
| 9291 | "/// returned vector. \\n\n" |
| 9292 | "/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n" |
| 9293 | "/// returned vector. \\n\n" |
| 9294 | "/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n" |
| 9295 | "/// returned vector. \\n\n" |
| 9296 | "/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n" |
| 9297 | "/// returned vector. \\n\n" |
| 9298 | "/// Bits [5:4]: \\n\n" |
| 9299 | "/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n" |
| 9300 | "/// returned vector. \\n\n" |
| 9301 | "/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n" |
| 9302 | "/// returned vector. \\n\n" |
| 9303 | "/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n" |
| 9304 | "/// returned vector. \\n\n" |
| 9305 | "/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n" |
| 9306 | "/// returned vector. \\n\n" |
| 9307 | "/// Bits [7:6]: \\n\n" |
| 9308 | "/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n" |
| 9309 | "/// returned vector. \\n\n" |
| 9310 | "/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n" |
| 9311 | "/// returned vector. \\n\n" |
| 9312 | "/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n" |
| 9313 | "/// returned vector. \\n\n" |
| 9314 | "/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n" |
| 9315 | "/// returned vector.\n" |
| 9316 | "/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n" |
| 9317 | "#define _mm_permute_ps(A, C) \\\n" |
| 9318 | " (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))\n" |
| 9319 | "\n" |
| 9320 | "/// Copies the values in a 256-bit vector of [8 x float] as specified by\n" |
| 9321 | "/// the immediate integer operand.\n" |
| 9322 | "///\n" |
| 9323 | "/// \\headerfile <x86intrin.h>\n" |
| 9324 | "///\n" |
| 9325 | "/// \\code\n" |
| 9326 | "/// __m256 _mm256_permute_ps(__m256 A, const int C);\n" |
| 9327 | "/// \\endcode\n" |
| 9328 | "///\n" |
| 9329 | "/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.\n" |
| 9330 | "///\n" |
| 9331 | "/// \\param A\n" |
| 9332 | "/// A 256-bit vector of [8 x float].\n" |
| 9333 | "/// \\param C\n" |
| 9334 | "/// An immediate integer operand specifying how the values are to be\n" |
| 9335 | "/// copied. \\n\n" |
| 9336 | "/// Bits [1:0]: \\n\n" |
| 9337 | "/// 00: Bits [31:0] of the source are copied to bits [31:0] of the\n" |
| 9338 | "/// returned vector. \\n\n" |
| 9339 | "/// 01: Bits [63:32] of the source are copied to bits [31:0] of the\n" |
| 9340 | "/// returned vector. \\n\n" |
| 9341 | "/// 10: Bits [95:64] of the source are copied to bits [31:0] of the\n" |
| 9342 | "/// returned vector. \\n\n" |
| 9343 | "/// 11: Bits [127:96] of the source are copied to bits [31:0] of the\n" |
| 9344 | "/// returned vector. \\n\n" |
| 9345 | "/// Bits [3:2]: \\n\n" |
| 9346 | "/// 00: Bits [31:0] of the source are copied to bits [63:32] of the\n" |
| 9347 | "/// returned vector. \\n\n" |
| 9348 | "/// 01: Bits [63:32] of the source are copied to bits [63:32] of the\n" |
| 9349 | "/// returned vector. \\n\n" |
| 9350 | "/// 10: Bits [95:64] of the source are copied to bits [63:32] of the\n" |
| 9351 | "/// returned vector. \\n\n" |
| 9352 | "/// 11: Bits [127:96] of the source are copied to bits [63:32] of the\n" |
| 9353 | "/// returned vector. \\n\n" |
| 9354 | "/// Bits [5:4]: \\n\n" |
| 9355 | "/// 00: Bits [31:0] of the source are copied to bits [95:64] of the\n" |
| 9356 | "/// returned vector. \\n\n" |
| 9357 | "/// 01: Bits [63:32] of the source are copied to bits [95:64] of the\n" |
| 9358 | "/// returned vector. \\n\n" |
| 9359 | "/// 10: Bits [95:64] of the source are copied to bits [95:64] of the\n" |
| 9360 | "/// returned vector. \\n\n" |
| 9361 | "/// 11: Bits [127:96] of the source are copied to bits [95:64] of the\n" |
| 9362 | "/// returned vector. \\n\n" |
| 9363 | "/// Bits [7:6]: \\n\n" |
| 9364 | "/// 00: Bits [31:0] of the source are copied to bits [127:96] of the\n" |
| 9365 | "/// returned vector. \\n\n" |
| 9366 | "/// 01: Bits [63:32] of the source are copied to bits [127:96] of the\n" |
| 9367 | "/// returned vector. \\n\n" |
| 9368 | "/// 10: Bits [95:64] of the source are copied to bits [127:96] of the\n" |
| 9369 | "/// returned vector. \\n\n" |
| 9370 | "/// 11: Bits [127:96] of the source are copied to bits [127:96] of the\n" |
| 9371 | "/// returned vector. \\n\n" |
| 9372 | "/// Bits [1:0]: \\n\n" |
| 9373 | "/// 00: Bits [159:128] of the source are copied to bits [159:128] of the\n" |
| 9374 | "/// returned vector. \\n\n" |
| 9375 | "/// 01: Bits [191:160] of the source are copied to bits [159:128] of the\n" |
| 9376 | "/// returned vector. \\n\n" |
| 9377 | "/// 10: Bits [223:192] of the source are copied to bits [159:128] of the\n" |
| 9378 | "/// returned vector. \\n\n" |
| 9379 | "/// 11: Bits [255:224] of the source are copied to bits [159:128] of the\n" |
| 9380 | "/// returned vector. \\n\n" |
| 9381 | "/// Bits [3:2]: \\n\n" |
| 9382 | "/// 00: Bits [159:128] of the source are copied to bits [191:160] of the\n" |
| 9383 | "/// returned vector. \\n\n" |
| 9384 | "/// 01: Bits [191:160] of the source are copied to bits [191:160] of the\n" |
| 9385 | "/// returned vector. \\n\n" |
| 9386 | "/// 10: Bits [223:192] of the source are copied to bits [191:160] of the\n" |
| 9387 | "/// returned vector. \\n\n" |
| 9388 | "/// 11: Bits [255:224] of the source are copied to bits [191:160] of the\n" |
| 9389 | "/// returned vector. \\n\n" |
| 9390 | "/// Bits [5:4]: \\n\n" |
| 9391 | "/// 00: Bits [159:128] of the source are copied to bits [223:192] of the\n" |
| 9392 | "/// returned vector. \\n\n" |
| 9393 | "/// 01: Bits [191:160] of the source are copied to bits [223:192] of the\n" |
| 9394 | "/// returned vector. \\n\n" |
| 9395 | "/// 10: Bits [223:192] of the source are copied to bits [223:192] of the\n" |
| 9396 | "/// returned vector. \\n\n" |
| 9397 | "/// 11: Bits [255:224] of the source are copied to bits [223:192] of the\n" |
| 9398 | "/// returned vector. \\n\n" |
| 9399 | "/// Bits [7:6]: \\n\n" |
| 9400 | "/// 00: Bits [159:128] of the source are copied to bits [255:224] of the\n" |
| 9401 | "/// returned vector. \\n\n" |
| 9402 | "/// 01: Bits [191:160] of the source are copied to bits [255:224] of the\n" |
| 9403 | "/// returned vector. \\n\n" |
| 9404 | "/// 10: Bits [223:192] of the source are copied to bits [255:224] of the\n" |
| 9405 | "/// returned vector. \\n\n" |
| 9406 | "/// 11: Bits [255:224] of the source are copied to bits [255:224] of the\n" |
| 9407 | "/// returned vector.\n" |
| 9408 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
| 9409 | "#define _mm256_permute_ps(A, C) \\\n" |
| 9410 | " (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))\n" |
| 9411 | "\n" |
| 9412 | "/// Permutes 128-bit data values stored in two 256-bit vectors of\n" |
| 9413 | "/// [4 x double], as specified by the immediate integer operand.\n" |
| 9414 | "///\n" |
| 9415 | "/// \\headerfile <x86intrin.h>\n" |
| 9416 | "///\n" |
| 9417 | "/// \\code\n" |
| 9418 | "/// __m256d _mm256_permute2f128_pd(__m256d V1, __m256d V2, const int M);\n" |
| 9419 | "/// \\endcode\n" |
| 9420 | "///\n" |
| 9421 | "/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n" |
| 9422 | "///\n" |
| 9423 | "/// \\param V1\n" |
| 9424 | "/// A 256-bit vector of [4 x double].\n" |
| 9425 | "/// \\param V2\n" |
| 9426 | "/// A 256-bit vector of [4 x double.\n" |
| 9427 | "/// \\param M\n" |
| 9428 | "/// An immediate integer operand specifying how the values are to be\n" |
| 9429 | "/// permuted. \\n\n" |
| 9430 | "/// Bits [1:0]: \\n\n" |
| 9431 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n" |
| 9432 | "/// destination. \\n\n" |
| 9433 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n" |
| 9434 | "/// destination. \\n\n" |
| 9435 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n" |
| 9436 | "/// destination. \\n\n" |
| 9437 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n" |
| 9438 | "/// destination. \\n\n" |
| 9439 | "/// Bits [5:4]: \\n\n" |
| 9440 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n" |
| 9441 | "/// destination. \\n\n" |
| 9442 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n" |
| 9443 | "/// destination. \\n\n" |
| 9444 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n" |
| 9445 | "/// destination. \\n\n" |
| 9446 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n" |
| 9447 | "/// destination.\n" |
| 9448 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
| 9449 | "#define _mm256_permute2f128_pd(V1, V2, M) \\\n" |
| 9450 | " (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \\\n" |
| 9451 | " (__v4df)(__m256d)(V2), (int)(M))\n" |
| 9452 | "\n" |
| 9453 | "/// Permutes 128-bit data values stored in two 256-bit vectors of\n" |
| 9454 | "/// [8 x float], as specified by the immediate integer operand.\n" |
| 9455 | "///\n" |
| 9456 | "/// \\headerfile <x86intrin.h>\n" |
| 9457 | "///\n" |
| 9458 | "/// \\code\n" |
| 9459 | "/// __m256 _mm256_permute2f128_ps(__m256 V1, __m256 V2, const int M);\n" |
| 9460 | "/// \\endcode\n" |
| 9461 | "///\n" |
| 9462 | "/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n" |
| 9463 | "///\n" |
| 9464 | "/// \\param V1\n" |
| 9465 | "/// A 256-bit vector of [8 x float].\n" |
| 9466 | "/// \\param V2\n" |
| 9467 | "/// A 256-bit vector of [8 x float].\n" |
| 9468 | "/// \\param M\n" |
| 9469 | "/// An immediate integer operand specifying how the values are to be\n" |
| 9470 | "/// permuted. \\n\n" |
| 9471 | "/// Bits [1:0]: \\n\n" |
| 9472 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n" |
| 9473 | "/// destination. \\n\n" |
| 9474 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n" |
| 9475 | "/// destination. \\n\n" |
| 9476 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n" |
| 9477 | "/// destination. \\n\n" |
| 9478 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n" |
| 9479 | "/// destination. \\n\n" |
| 9480 | "/// Bits [5:4]: \\n\n" |
| 9481 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n" |
| 9482 | "/// destination. \\n\n" |
| 9483 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n" |
| 9484 | "/// destination. \\n\n" |
| 9485 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n" |
| 9486 | "/// destination. \\n\n" |
| 9487 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n" |
| 9488 | "/// destination.\n" |
| 9489 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
| 9490 | "#define _mm256_permute2f128_ps(V1, V2, M) \\\n" |
| 9491 | " (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \\\n" |
| 9492 | " (__v8sf)(__m256)(V2), (int)(M))\n" |
| 9493 | "\n" |
| 9494 | "/// Permutes 128-bit data values stored in two 256-bit integer vectors,\n" |
| 9495 | "/// as specified by the immediate integer operand.\n" |
| 9496 | "///\n" |
| 9497 | "/// \\headerfile <x86intrin.h>\n" |
| 9498 | "///\n" |
| 9499 | "/// \\code\n" |
| 9500 | "/// __m256i _mm256_permute2f128_si256(__m256i V1, __m256i V2, const int M);\n" |
| 9501 | "/// \\endcode\n" |
| 9502 | "///\n" |
| 9503 | "/// This intrinsic corresponds to the <c> VPERM2F128 </c> instruction.\n" |
| 9504 | "///\n" |
| 9505 | "/// \\param V1\n" |
| 9506 | "/// A 256-bit integer vector.\n" |
| 9507 | "/// \\param V2\n" |
| 9508 | "/// A 256-bit integer vector.\n" |
| 9509 | "/// \\param M\n" |
| 9510 | "/// An immediate integer operand specifying how the values are to be copied.\n" |
| 9511 | "/// Bits [1:0]: \\n\n" |
| 9512 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [127:0] of the\n" |
| 9513 | "/// destination. \\n\n" |
| 9514 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [127:0] of the\n" |
| 9515 | "/// destination. \\n\n" |
| 9516 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [127:0] of the\n" |
| 9517 | "/// destination. \\n\n" |
| 9518 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [127:0] of the\n" |
| 9519 | "/// destination. \\n\n" |
| 9520 | "/// Bits [5:4]: \\n\n" |
| 9521 | "/// 00: Bits [127:0] of operand \\a V1 are copied to bits [255:128] of the\n" |
| 9522 | "/// destination. \\n\n" |
| 9523 | "/// 01: Bits [255:128] of operand \\a V1 are copied to bits [255:128] of the\n" |
| 9524 | "/// destination. \\n\n" |
| 9525 | "/// 10: Bits [127:0] of operand \\a V2 are copied to bits [255:128] of the\n" |
| 9526 | "/// destination. \\n\n" |
| 9527 | "/// 11: Bits [255:128] of operand \\a V2 are copied to bits [255:128] of the\n" |
| 9528 | "/// destination.\n" |
| 9529 | "/// \\returns A 256-bit integer vector containing the copied values.\n" |
| 9530 | "#define _mm256_permute2f128_si256(V1, V2, M) \\\n" |
| 9531 | " (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \\\n" |
| 9532 | " (__v8si)(__m256i)(V2), (int)(M))\n" |
| 9533 | "\n" |
| 9534 | "/* Vector Blend */\n" |
| 9535 | "/// Merges 64-bit double-precision data values stored in either of the\n" |
| 9536 | "/// two 256-bit vectors of [4 x double], as specified by the immediate\n" |
| 9537 | "/// integer operand.\n" |
| 9538 | "///\n" |
| 9539 | "/// \\headerfile <x86intrin.h>\n" |
| 9540 | "///\n" |
| 9541 | "/// \\code\n" |
| 9542 | "/// __m256d _mm256_blend_pd(__m256d V1, __m256d V2, const int M);\n" |
| 9543 | "/// \\endcode\n" |
| 9544 | "///\n" |
| 9545 | "/// This intrinsic corresponds to the <c> VBLENDPD </c> instruction.\n" |
| 9546 | "///\n" |
| 9547 | "/// \\param V1\n" |
| 9548 | "/// A 256-bit vector of [4 x double].\n" |
| 9549 | "/// \\param V2\n" |
| 9550 | "/// A 256-bit vector of [4 x double].\n" |
| 9551 | "/// \\param M\n" |
| 9552 | "/// An immediate integer operand, with mask bits [3:0] specifying how the\n" |
| 9553 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
| 9554 | "/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n" |
| 9555 | "/// element in operand \\a V1 is copied to the same position in the\n" |
| 9556 | "/// destination. When a mask bit is 1, the corresponding 64-bit element in\n" |
| 9557 | "/// operand \\a V2 is copied to the same position in the destination.\n" |
| 9558 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
| 9559 | "#define _mm256_blend_pd(V1, V2, M) \\\n" |
| 9560 | " (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \\\n" |
| 9561 | " (__v4df)(__m256d)(V2), (int)(M))\n" |
| 9562 | "\n" |
| 9563 | "/// Merges 32-bit single-precision data values stored in either of the\n" |
| 9564 | "/// two 256-bit vectors of [8 x float], as specified by the immediate\n" |
| 9565 | "/// integer operand.\n" |
| 9566 | "///\n" |
| 9567 | "/// \\headerfile <x86intrin.h>\n" |
| 9568 | "///\n" |
| 9569 | "/// \\code\n" |
| 9570 | "/// __m256 _mm256_blend_ps(__m256 V1, __m256 V2, const int M);\n" |
| 9571 | "/// \\endcode\n" |
| 9572 | "///\n" |
| 9573 | "/// This intrinsic corresponds to the <c> VBLENDPS </c> instruction.\n" |
| 9574 | "///\n" |
| 9575 | "/// \\param V1\n" |
| 9576 | "/// A 256-bit vector of [8 x float].\n" |
| 9577 | "/// \\param V2\n" |
| 9578 | "/// A 256-bit vector of [8 x float].\n" |
| 9579 | "/// \\param M\n" |
| 9580 | "/// An immediate integer operand, with mask bits [7:0] specifying how the\n" |
| 9581 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
| 9582 | "/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n" |
| 9583 | "/// element in operand \\a V1 is copied to the same position in the\n" |
| 9584 | "/// destination. When a mask bit is 1, the corresponding 32-bit element in\n" |
| 9585 | "/// operand \\a V2 is copied to the same position in the destination.\n" |
| 9586 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
| 9587 | "#define _mm256_blend_ps(V1, V2, M) \\\n" |
| 9588 | " (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \\\n" |
| 9589 | " (__v8sf)(__m256)(V2), (int)(M))\n" |
| 9590 | "\n" |
| 9591 | "/// Merges 64-bit double-precision data values stored in either of the\n" |
| 9592 | "/// two 256-bit vectors of [4 x double], as specified by the 256-bit vector\n" |
| 9593 | "/// operand.\n" |
| 9594 | "///\n" |
| 9595 | "/// \\headerfile <x86intrin.h>\n" |
| 9596 | "///\n" |
| 9597 | "/// This intrinsic corresponds to the <c> VBLENDVPD </c> instruction.\n" |
| 9598 | "///\n" |
| 9599 | "/// \\param __a\n" |
| 9600 | "/// A 256-bit vector of [4 x double].\n" |
| 9601 | "/// \\param __b\n" |
| 9602 | "/// A 256-bit vector of [4 x double].\n" |
| 9603 | "/// \\param __c\n" |
| 9604 | "/// A 256-bit vector operand, with mask bits 255, 191, 127, and 63 specifying\n" |
| 9605 | "/// how the values are to be copied. The position of the mask bit corresponds\n" |
| 9606 | "/// to the most significant bit of a copied value. When a mask bit is 0, the\n" |
| 9607 | "/// corresponding 64-bit element in operand \\a __a is copied to the same\n" |
| 9608 | "/// position in the destination. When a mask bit is 1, the corresponding\n" |
| 9609 | "/// 64-bit element in operand \\a __b is copied to the same position in the\n" |
| 9610 | "/// destination.\n" |
| 9611 | "/// \\returns A 256-bit vector of [4 x double] containing the copied values.\n" |
| 9612 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 9613 | "_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)\n" |
| 9614 | "{\n" |
| 9615 | " return (__m256d)__builtin_ia32_blendvpd256(\n" |
| 9616 | " (__v4df)__a, (__v4df)__b, (__v4df)__c);\n" |
| 9617 | "}\n" |
| 9618 | "\n" |
| 9619 | "/// Merges 32-bit single-precision data values stored in either of the\n" |
| 9620 | "/// two 256-bit vectors of [8 x float], as specified by the 256-bit vector\n" |
| 9621 | "/// operand.\n" |
| 9622 | "///\n" |
| 9623 | "/// \\headerfile <x86intrin.h>\n" |
| 9624 | "///\n" |
| 9625 | "/// This intrinsic corresponds to the <c> VBLENDVPS </c> instruction.\n" |
| 9626 | "///\n" |
| 9627 | "/// \\param __a\n" |
| 9628 | "/// A 256-bit vector of [8 x float].\n" |
| 9629 | "/// \\param __b\n" |
| 9630 | "/// A 256-bit vector of [8 x float].\n" |
| 9631 | "/// \\param __c\n" |
| 9632 | "/// A 256-bit vector operand, with mask bits 255, 223, 191, 159, 127, 95, 63,\n" |
| 9633 | "/// and 31 specifying how the values are to be copied. The position of the\n" |
| 9634 | "/// mask bit corresponds to the most significant bit of a copied value. When\n" |
| 9635 | "/// a mask bit is 0, the corresponding 32-bit element in operand \\a __a is\n" |
| 9636 | "/// copied to the same position in the destination. When a mask bit is 1, the\n" |
| 9637 | "/// corresponding 32-bit element in operand \\a __b is copied to the same\n" |
| 9638 | "/// position in the destination.\n" |
| 9639 | "/// \\returns A 256-bit vector of [8 x float] containing the copied values.\n" |
| 9640 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 9641 | "_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)\n" |
| 9642 | "{\n" |
| 9643 | " return (__m256)__builtin_ia32_blendvps256(\n" |
| 9644 | " (__v8sf)__a, (__v8sf)__b, (__v8sf)__c);\n" |
| 9645 | "}\n" |
| 9646 | "\n" |
| 9647 | "/* Vector Dot Product */\n" |
| 9648 | "/// Computes two dot products in parallel, using the lower and upper\n" |
| 9649 | "/// halves of two [8 x float] vectors as input to the two computations, and\n" |
| 9650 | "/// returning the two dot products in the lower and upper halves of the\n" |
| 9651 | "/// [8 x float] result.\n" |
| 9652 | "///\n" |
| 9653 | "/// The immediate integer operand controls which input elements will\n" |
| 9654 | "/// contribute to the dot product, and where the final results are returned.\n" |
| 9655 | "/// In general, for each dot product, the four corresponding elements of the\n" |
| 9656 | "/// input vectors are multiplied; the first two and second two products are\n" |
| 9657 | "/// summed, then the two sums are added to form the final result.\n" |
| 9658 | "///\n" |
| 9659 | "/// \\headerfile <x86intrin.h>\n" |
| 9660 | "///\n" |
| 9661 | "/// \\code\n" |
| 9662 | "/// __m256 _mm256_dp_ps(__m256 V1, __m256 V2, const int M);\n" |
| 9663 | "/// \\endcode\n" |
| 9664 | "///\n" |
| 9665 | "/// This intrinsic corresponds to the <c> VDPPS </c> instruction.\n" |
| 9666 | "///\n" |
| 9667 | "/// \\param V1\n" |
| 9668 | "/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n" |
| 9669 | "/// \\param V2\n" |
| 9670 | "/// A vector of [8 x float] values, treated as two [4 x float] vectors.\n" |
| 9671 | "/// \\param M\n" |
| 9672 | "/// An immediate integer argument. Bits [7:4] determine which elements of\n" |
| 9673 | "/// the input vectors are used, with bit [4] corresponding to the lowest\n" |
| 9674 | "/// element and bit [7] corresponding to the highest element of each [4 x\n" |
| 9675 | "/// float] subvector. If a bit is set, the corresponding elements from the\n" |
| 9676 | "/// two input vectors are used as an input for dot product; otherwise that\n" |
| 9677 | "/// input is treated as zero. Bits [3:0] determine which elements of the\n" |
| 9678 | "/// result will receive a copy of the final dot product, with bit [0]\n" |
| 9679 | "/// corresponding to the lowest element and bit [3] corresponding to the\n" |
| 9680 | "/// highest element of each [4 x float] subvector. If a bit is set, the dot\n" |
| 9681 | "/// product is returned in the corresponding element; otherwise that element\n" |
| 9682 | "/// is set to zero. The bitmask is applied in the same way to each of the\n" |
| 9683 | "/// two parallel dot product computations.\n" |
| 9684 | "/// \\returns A 256-bit vector of [8 x float] containing the two dot products.\n" |
| 9685 | "#define _mm256_dp_ps(V1, V2, M) \\\n" |
| 9686 | " (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \\\n" |
| 9687 | " (__v8sf)(__m256)(V2), (M))\n" |
| 9688 | "\n" |
| 9689 | "/* Vector shuffle */\n" |
| 9690 | "/// Selects 8 float values from the 256-bit operands of [8 x float], as\n" |
| 9691 | "/// specified by the immediate value operand.\n" |
| 9692 | "///\n" |
| 9693 | "/// The four selected elements in each operand are copied to the destination\n" |
| 9694 | "/// according to the bits specified in the immediate operand. The selected\n" |
| 9695 | "/// elements from the first 256-bit operand are copied to bits [63:0] and\n" |
| 9696 | "/// bits [191:128] of the destination, and the selected elements from the\n" |
| 9697 | "/// second 256-bit operand are copied to bits [127:64] and bits [255:192] of\n" |
| 9698 | "/// the destination. For example, if bits [7:0] of the immediate operand\n" |
| 9699 | "/// contain a value of 0xFF, the 256-bit destination vector would contain the\n" |
| 9700 | "/// following values: b[7], b[7], a[7], a[7], b[3], b[3], a[3], a[3].\n" |
| 9701 | "///\n" |
| 9702 | "/// \\headerfile <x86intrin.h>\n" |
| 9703 | "///\n" |
| 9704 | "/// \\code\n" |
| 9705 | "/// __m256 _mm256_shuffle_ps(__m256 a, __m256 b, const int mask);\n" |
| 9706 | "/// \\endcode\n" |
| 9707 | "///\n" |
| 9708 | "/// This intrinsic corresponds to the <c> VSHUFPS </c> instruction.\n" |
| 9709 | "///\n" |
| 9710 | "/// \\param a\n" |
| 9711 | "/// A 256-bit vector of [8 x float]. The four selected elements in this\n" |
| 9712 | "/// operand are copied to bits [63:0] and bits [191:128] in the destination,\n" |
| 9713 | "/// according to the bits specified in the immediate operand.\n" |
| 9714 | "/// \\param b\n" |
| 9715 | "/// A 256-bit vector of [8 x float]. The four selected elements in this\n" |
| 9716 | "/// operand are copied to bits [127:64] and bits [255:192] in the\n" |
| 9717 | "/// destination, according to the bits specified in the immediate operand.\n" |
| 9718 | "/// \\param mask\n" |
| 9719 | "/// An immediate value containing an 8-bit value specifying which elements to\n" |
| 9720 | "/// copy from \\a a and \\a b \\n.\n" |
| 9721 | "/// Bits [3:0] specify the values copied from operand \\a a. \\n\n" |
| 9722 | "/// Bits [7:4] specify the values copied from operand \\a b. \\n\n" |
| 9723 | "/// The destinations within the 256-bit destination are assigned values as\n" |
| 9724 | "/// follows, according to the bit value assignments described below: \\n\n" |
| 9725 | "/// Bits [1:0] are used to assign values to bits [31:0] and [159:128] in the\n" |
| 9726 | "/// destination. \\n\n" |
| 9727 | "/// Bits [3:2] are used to assign values to bits [63:32] and [191:160] in the\n" |
| 9728 | "/// destination. \\n\n" |
| 9729 | "/// Bits [5:4] are used to assign values to bits [95:64] and [223:192] in the\n" |
| 9730 | "/// destination. \\n\n" |
| 9731 | "/// Bits [7:6] are used to assign values to bits [127:96] and [255:224] in\n" |
| 9732 | "/// the destination. \\n\n" |
| 9733 | "/// Bit value assignments: \\n\n" |
| 9734 | "/// 00: Bits [31:0] and [159:128] are copied from the selected operand. \\n\n" |
| 9735 | "/// 01: Bits [63:32] and [191:160] are copied from the selected operand. \\n\n" |
| 9736 | "/// 10: Bits [95:64] and [223:192] are copied from the selected operand. \\n\n" |
| 9737 | "/// 11: Bits [127:96] and [255:224] are copied from the selected operand.\n" |
| 9738 | "/// \\returns A 256-bit vector of [8 x float] containing the shuffled values.\n" |
| 9739 | "#define _mm256_shuffle_ps(a, b, mask) \\\n" |
| 9740 | " (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \\\n" |
| 9741 | " (__v8sf)(__m256)(b), (int)(mask))\n" |
| 9742 | "\n" |
| 9743 | "/// Selects four double-precision values from the 256-bit operands of\n" |
| 9744 | "/// [4 x double], as specified by the immediate value operand.\n" |
| 9745 | "///\n" |
| 9746 | "/// The selected elements from the first 256-bit operand are copied to bits\n" |
| 9747 | "/// [63:0] and bits [191:128] in the destination, and the selected elements\n" |
| 9748 | "/// from the second 256-bit operand are copied to bits [127:64] and bits\n" |
| 9749 | "/// [255:192] in the destination. For example, if bits [3:0] of the immediate\n" |
| 9750 | "/// operand contain a value of 0xF, the 256-bit destination vector would\n" |
| 9751 | "/// contain the following values: b[3], a[3], b[1], a[1].\n" |
| 9752 | "///\n" |
| 9753 | "/// \\headerfile <x86intrin.h>\n" |
| 9754 | "///\n" |
| 9755 | "/// \\code\n" |
| 9756 | "/// __m256d _mm256_shuffle_pd(__m256d a, __m256d b, const int mask);\n" |
| 9757 | "/// \\endcode\n" |
| 9758 | "///\n" |
| 9759 | "/// This intrinsic corresponds to the <c> VSHUFPD </c> instruction.\n" |
| 9760 | "///\n" |
| 9761 | "/// \\param a\n" |
| 9762 | "/// A 256-bit vector of [4 x double].\n" |
| 9763 | "/// \\param b\n" |
| 9764 | "/// A 256-bit vector of [4 x double].\n" |
| 9765 | "/// \\param mask\n" |
| 9766 | "/// An immediate value containing 8-bit values specifying which elements to\n" |
| 9767 | "/// copy from \\a a and \\a b: \\n\n" |
| 9768 | "/// Bit [0]=0: Bits [63:0] are copied from \\a a to bits [63:0] of the\n" |
| 9769 | "/// destination. \\n\n" |
| 9770 | "/// Bit [0]=1: Bits [127:64] are copied from \\a a to bits [63:0] of the\n" |
| 9771 | "/// destination. \\n\n" |
| 9772 | "/// Bit [1]=0: Bits [63:0] are copied from \\a b to bits [127:64] of the\n" |
| 9773 | "/// destination. \\n\n" |
| 9774 | "/// Bit [1]=1: Bits [127:64] are copied from \\a b to bits [127:64] of the\n" |
| 9775 | "/// destination. \\n\n" |
| 9776 | "/// Bit [2]=0: Bits [191:128] are copied from \\a a to bits [191:128] of the\n" |
| 9777 | "/// destination. \\n\n" |
| 9778 | "/// Bit [2]=1: Bits [255:192] are copied from \\a a to bits [191:128] of the\n" |
| 9779 | "/// destination. \\n\n" |
| 9780 | "/// Bit [3]=0: Bits [191:128] are copied from \\a b to bits [255:192] of the\n" |
| 9781 | "/// destination. \\n\n" |
| 9782 | "/// Bit [3]=1: Bits [255:192] are copied from \\a b to bits [255:192] of the\n" |
| 9783 | "/// destination.\n" |
| 9784 | "/// \\returns A 256-bit vector of [4 x double] containing the shuffled values.\n" |
| 9785 | "#define _mm256_shuffle_pd(a, b, mask) \\\n" |
| 9786 | " (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \\\n" |
| 9787 | " (__v4df)(__m256d)(b), (int)(mask))\n" |
| 9788 | "\n" |
| 9789 | "/* Compare */\n" |
| 9790 | "#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */\n" |
| 9791 | "#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */\n" |
| 9792 | "#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */\n" |
| 9793 | "#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */\n" |
| 9794 | "#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */\n" |
| 9795 | "#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */\n" |
| 9796 | "#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */\n" |
| 9797 | "#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */\n" |
| 9798 | "#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */\n" |
| 9799 | "#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */\n" |
| 9800 | "#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */\n" |
| 9801 | "#define _CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */\n" |
| 9802 | "#define _CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */\n" |
| 9803 | "#define _CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */\n" |
| 9804 | "#define _CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */\n" |
| 9805 | "#define _CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */\n" |
| 9806 | "#define _CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */\n" |
| 9807 | "#define _CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */\n" |
| 9808 | "#define _CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */\n" |
| 9809 | "#define _CMP_UNORD_S 0x13 /* Unordered (signaling) */\n" |
| 9810 | "#define _CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */\n" |
| 9811 | "#define _CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */\n" |
| 9812 | "#define _CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unordered, non-signaling) */\n" |
| 9813 | "#define _CMP_ORD_S 0x17 /* Ordered (signaling) */\n" |
| 9814 | "#define _CMP_EQ_US 0x18 /* Equal (unordered, signaling) */\n" |
| 9815 | "#define _CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unordered, non-signaling) */\n" |
| 9816 | "#define _CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */\n" |
| 9817 | "#define _CMP_FALSE_OS 0x1b /* False (ordered, signaling) */\n" |
| 9818 | "#define _CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */\n" |
| 9819 | "#define _CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */\n" |
| 9820 | "#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */\n" |
| 9821 | "#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */\n" |
| 9822 | "\n" |
| 9823 | "/// Compares each of the corresponding double-precision values of two\n" |
| 9824 | "/// 128-bit vectors of [2 x double], using the operation specified by the\n" |
| 9825 | "/// immediate integer operand.\n" |
| 9826 | "///\n" |
| 9827 | "/// Returns a [2 x double] vector consisting of two doubles corresponding to\n" |
| 9828 | "/// the two comparison results: zero if the comparison is false, and all 1's\n" |
| 9829 | "/// if the comparison is true.\n" |
| 9830 | "///\n" |
| 9831 | "/// \\headerfile <x86intrin.h>\n" |
| 9832 | "///\n" |
| 9833 | "/// \\code\n" |
| 9834 | "/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);\n" |
| 9835 | "/// \\endcode\n" |
| 9836 | "///\n" |
| 9837 | "/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n" |
| 9838 | "///\n" |
| 9839 | "/// \\param a\n" |
| 9840 | "/// A 128-bit vector of [2 x double].\n" |
| 9841 | "/// \\param b\n" |
| 9842 | "/// A 128-bit vector of [2 x double].\n" |
| 9843 | "/// \\param c\n" |
| 9844 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
| 9845 | "/// operation to use: \\n\n" |
| 9846 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
| 9847 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
| 9848 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
| 9849 | "/// 0x03: Unordered (non-signaling) \\n\n" |
| 9850 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
| 9851 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
| 9852 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
| 9853 | "/// 0x07: Ordered (non-signaling) \\n\n" |
| 9854 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
| 9855 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
| 9856 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
| 9857 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
| 9858 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
| 9859 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
| 9860 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
| 9861 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
| 9862 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
| 9863 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
| 9864 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
| 9865 | "/// 0x13: Unordered (signaling) \\n\n" |
| 9866 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
| 9867 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
| 9868 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
| 9869 | "/// 0x17: Ordered (signaling) \\n\n" |
| 9870 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
| 9871 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
| 9872 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
| 9873 | "/// 0x1B: False (ordered, signaling) \\n\n" |
| 9874 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
| 9875 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
| 9876 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
| 9877 | "/// 0x1F: True (unordered, signaling)\n" |
| 9878 | "/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n" |
| 9879 | "#define _mm_cmp_pd(a, b, c) \\\n" |
| 9880 | " (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \\\n" |
| 9881 | " (__v2df)(__m128d)(b), (c))\n" |
| 9882 | "\n" |
| 9883 | "/// Compares each of the corresponding values of two 128-bit vectors of\n" |
| 9884 | "/// [4 x float], using the operation specified by the immediate integer\n" |
| 9885 | "/// operand.\n" |
| 9886 | "///\n" |
| 9887 | "/// Returns a [4 x float] vector consisting of four floats corresponding to\n" |
| 9888 | "/// the four comparison results: zero if the comparison is false, and all 1's\n" |
| 9889 | "/// if the comparison is true.\n" |
| 9890 | "///\n" |
| 9891 | "/// \\headerfile <x86intrin.h>\n" |
| 9892 | "///\n" |
| 9893 | "/// \\code\n" |
| 9894 | "/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);\n" |
| 9895 | "/// \\endcode\n" |
| 9896 | "///\n" |
| 9897 | "/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n" |
| 9898 | "///\n" |
| 9899 | "/// \\param a\n" |
| 9900 | "/// A 128-bit vector of [4 x float].\n" |
| 9901 | "/// \\param b\n" |
| 9902 | "/// A 128-bit vector of [4 x float].\n" |
| 9903 | "/// \\param c\n" |
| 9904 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
| 9905 | "/// operation to use: \\n\n" |
| 9906 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
| 9907 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
| 9908 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
| 9909 | "/// 0x03: Unordered (non-signaling) \\n\n" |
| 9910 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
| 9911 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
| 9912 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
| 9913 | "/// 0x07: Ordered (non-signaling) \\n\n" |
| 9914 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
| 9915 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
| 9916 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
| 9917 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
| 9918 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
| 9919 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
| 9920 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
| 9921 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
| 9922 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
| 9923 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
| 9924 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
| 9925 | "/// 0x13: Unordered (signaling) \\n\n" |
| 9926 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
| 9927 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
| 9928 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
| 9929 | "/// 0x17: Ordered (signaling) \\n\n" |
| 9930 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
| 9931 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
| 9932 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
| 9933 | "/// 0x1B: False (ordered, signaling) \\n\n" |
| 9934 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
| 9935 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
| 9936 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
| 9937 | "/// 0x1F: True (unordered, signaling)\n" |
| 9938 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 9939 | "#define _mm_cmp_ps(a, b, c) \\\n" |
| 9940 | " (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \\\n" |
| 9941 | " (__v4sf)(__m128)(b), (c))\n" |
| 9942 | "\n" |
| 9943 | "/// Compares each of the corresponding double-precision values of two\n" |
| 9944 | "/// 256-bit vectors of [4 x double], using the operation specified by the\n" |
| 9945 | "/// immediate integer operand.\n" |
| 9946 | "///\n" |
| 9947 | "/// Returns a [4 x double] vector consisting of four doubles corresponding to\n" |
| 9948 | "/// the four comparison results: zero if the comparison is false, and all 1's\n" |
| 9949 | "/// if the comparison is true.\n" |
| 9950 | "///\n" |
| 9951 | "/// \\headerfile <x86intrin.h>\n" |
| 9952 | "///\n" |
| 9953 | "/// \\code\n" |
| 9954 | "/// __m256d _mm256_cmp_pd(__m256d a, __m256d b, const int c);\n" |
| 9955 | "/// \\endcode\n" |
| 9956 | "///\n" |
| 9957 | "/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.\n" |
| 9958 | "///\n" |
| 9959 | "/// \\param a\n" |
| 9960 | "/// A 256-bit vector of [4 x double].\n" |
| 9961 | "/// \\param b\n" |
| 9962 | "/// A 256-bit vector of [4 x double].\n" |
| 9963 | "/// \\param c\n" |
| 9964 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
| 9965 | "/// operation to use: \\n\n" |
| 9966 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
| 9967 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
| 9968 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
| 9969 | "/// 0x03: Unordered (non-signaling) \\n\n" |
| 9970 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
| 9971 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
| 9972 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
| 9973 | "/// 0x07: Ordered (non-signaling) \\n\n" |
| 9974 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
| 9975 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
| 9976 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
| 9977 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
| 9978 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
| 9979 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
| 9980 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
| 9981 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
| 9982 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
| 9983 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
| 9984 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
| 9985 | "/// 0x13: Unordered (signaling) \\n\n" |
| 9986 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
| 9987 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
| 9988 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
| 9989 | "/// 0x17: Ordered (signaling) \\n\n" |
| 9990 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
| 9991 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
| 9992 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
| 9993 | "/// 0x1B: False (ordered, signaling) \\n\n" |
| 9994 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
| 9995 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
| 9996 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
| 9997 | "/// 0x1F: True (unordered, signaling)\n" |
| 9998 | "/// \\returns A 256-bit vector of [4 x double] containing the comparison results.\n" |
| 9999 | "#define _mm256_cmp_pd(a, b, c) \\\n" |
| 10000 | " (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \\\n" |
| 10001 | " (__v4df)(__m256d)(b), (c))\n" |
| 10002 | "\n" |
| 10003 | "/// Compares each of the corresponding values of two 256-bit vectors of\n" |
| 10004 | "/// [8 x float], using the operation specified by the immediate integer\n" |
| 10005 | "/// operand.\n" |
| 10006 | "///\n" |
| 10007 | "/// Returns a [8 x float] vector consisting of eight floats corresponding to\n" |
| 10008 | "/// the eight comparison results: zero if the comparison is false, and all\n" |
| 10009 | "/// 1's if the comparison is true.\n" |
| 10010 | "///\n" |
| 10011 | "/// \\headerfile <x86intrin.h>\n" |
| 10012 | "///\n" |
| 10013 | "/// \\code\n" |
| 10014 | "/// __m256 _mm256_cmp_ps(__m256 a, __m256 b, const int c);\n" |
| 10015 | "/// \\endcode\n" |
| 10016 | "///\n" |
| 10017 | "/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.\n" |
| 10018 | "///\n" |
| 10019 | "/// \\param a\n" |
| 10020 | "/// A 256-bit vector of [8 x float].\n" |
| 10021 | "/// \\param b\n" |
| 10022 | "/// A 256-bit vector of [8 x float].\n" |
| 10023 | "/// \\param c\n" |
| 10024 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
| 10025 | "/// operation to use: \\n\n" |
| 10026 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
| 10027 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
| 10028 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
| 10029 | "/// 0x03: Unordered (non-signaling) \\n\n" |
| 10030 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
| 10031 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
| 10032 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
| 10033 | "/// 0x07: Ordered (non-signaling) \\n\n" |
| 10034 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
| 10035 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
| 10036 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
| 10037 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
| 10038 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
| 10039 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
| 10040 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
| 10041 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
| 10042 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
| 10043 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
| 10044 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
| 10045 | "/// 0x13: Unordered (signaling) \\n\n" |
| 10046 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
| 10047 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
| 10048 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
| 10049 | "/// 0x17: Ordered (signaling) \\n\n" |
| 10050 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
| 10051 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
| 10052 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
| 10053 | "/// 0x1B: False (ordered, signaling) \\n\n" |
| 10054 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
| 10055 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
| 10056 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
| 10057 | "/// 0x1F: True (unordered, signaling)\n" |
| 10058 | "/// \\returns A 256-bit vector of [8 x float] containing the comparison results.\n" |
| 10059 | "#define _mm256_cmp_ps(a, b, c) \\\n" |
| 10060 | " (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \\\n" |
| 10061 | " (__v8sf)(__m256)(b), (c))\n" |
| 10062 | "\n" |
| 10063 | "/// Compares each of the corresponding scalar double-precision values of\n" |
| 10064 | "/// two 128-bit vectors of [2 x double], using the operation specified by the\n" |
| 10065 | "/// immediate integer operand.\n" |
| 10066 | "///\n" |
| 10067 | "/// If the result is true, all 64 bits of the destination vector are set;\n" |
| 10068 | "/// otherwise they are cleared.\n" |
| 10069 | "///\n" |
| 10070 | "/// \\headerfile <x86intrin.h>\n" |
| 10071 | "///\n" |
| 10072 | "/// \\code\n" |
| 10073 | "/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);\n" |
| 10074 | "/// \\endcode\n" |
| 10075 | "///\n" |
| 10076 | "/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.\n" |
| 10077 | "///\n" |
| 10078 | "/// \\param a\n" |
| 10079 | "/// A 128-bit vector of [2 x double].\n" |
| 10080 | "/// \\param b\n" |
| 10081 | "/// A 128-bit vector of [2 x double].\n" |
| 10082 | "/// \\param c\n" |
| 10083 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
| 10084 | "/// operation to use: \\n\n" |
| 10085 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
| 10086 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
| 10087 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
| 10088 | "/// 0x03: Unordered (non-signaling) \\n\n" |
| 10089 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
| 10090 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
| 10091 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
| 10092 | "/// 0x07: Ordered (non-signaling) \\n\n" |
| 10093 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
| 10094 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
| 10095 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
| 10096 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
| 10097 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
| 10098 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
| 10099 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
| 10100 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
| 10101 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
| 10102 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
| 10103 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
| 10104 | "/// 0x13: Unordered (signaling) \\n\n" |
| 10105 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
| 10106 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
| 10107 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
| 10108 | "/// 0x17: Ordered (signaling) \\n\n" |
| 10109 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
| 10110 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
| 10111 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
| 10112 | "/// 0x1B: False (ordered, signaling) \\n\n" |
| 10113 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
| 10114 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
| 10115 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
| 10116 | "/// 0x1F: True (unordered, signaling)\n" |
| 10117 | "/// \\returns A 128-bit vector of [2 x double] containing the comparison results.\n" |
| 10118 | "#define _mm_cmp_sd(a, b, c) \\\n" |
| 10119 | " (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \\\n" |
| 10120 | " (__v2df)(__m128d)(b), (c))\n" |
| 10121 | "\n" |
| 10122 | "/// Compares each of the corresponding scalar values of two 128-bit\n" |
| 10123 | "/// vectors of [4 x float], using the operation specified by the immediate\n" |
| 10124 | "/// integer operand.\n" |
| 10125 | "///\n" |
| 10126 | "/// If the result is true, all 32 bits of the destination vector are set;\n" |
| 10127 | "/// otherwise they are cleared.\n" |
| 10128 | "///\n" |
| 10129 | "/// \\headerfile <x86intrin.h>\n" |
| 10130 | "///\n" |
| 10131 | "/// \\code\n" |
| 10132 | "/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);\n" |
| 10133 | "/// \\endcode\n" |
| 10134 | "///\n" |
| 10135 | "/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.\n" |
| 10136 | "///\n" |
| 10137 | "/// \\param a\n" |
| 10138 | "/// A 128-bit vector of [4 x float].\n" |
| 10139 | "/// \\param b\n" |
| 10140 | "/// A 128-bit vector of [4 x float].\n" |
| 10141 | "/// \\param c\n" |
| 10142 | "/// An immediate integer operand, with bits [4:0] specifying which comparison\n" |
| 10143 | "/// operation to use: \\n\n" |
| 10144 | "/// 0x00: Equal (ordered, non-signaling) \\n\n" |
| 10145 | "/// 0x01: Less-than (ordered, signaling) \\n\n" |
| 10146 | "/// 0x02: Less-than-or-equal (ordered, signaling) \\n\n" |
| 10147 | "/// 0x03: Unordered (non-signaling) \\n\n" |
| 10148 | "/// 0x04: Not-equal (unordered, non-signaling) \\n\n" |
| 10149 | "/// 0x05: Not-less-than (unordered, signaling) \\n\n" |
| 10150 | "/// 0x06: Not-less-than-or-equal (unordered, signaling) \\n\n" |
| 10151 | "/// 0x07: Ordered (non-signaling) \\n\n" |
| 10152 | "/// 0x08: Equal (unordered, non-signaling) \\n\n" |
| 10153 | "/// 0x09: Not-greater-than-or-equal (unordered, signaling) \\n\n" |
| 10154 | "/// 0x0A: Not-greater-than (unordered, signaling) \\n\n" |
| 10155 | "/// 0x0B: False (ordered, non-signaling) \\n\n" |
| 10156 | "/// 0x0C: Not-equal (ordered, non-signaling) \\n\n" |
| 10157 | "/// 0x0D: Greater-than-or-equal (ordered, signaling) \\n\n" |
| 10158 | "/// 0x0E: Greater-than (ordered, signaling) \\n\n" |
| 10159 | "/// 0x0F: True (unordered, non-signaling) \\n\n" |
| 10160 | "/// 0x10: Equal (ordered, signaling) \\n\n" |
| 10161 | "/// 0x11: Less-than (ordered, non-signaling) \\n\n" |
| 10162 | "/// 0x12: Less-than-or-equal (ordered, non-signaling) \\n\n" |
| 10163 | "/// 0x13: Unordered (signaling) \\n\n" |
| 10164 | "/// 0x14: Not-equal (unordered, signaling) \\n\n" |
| 10165 | "/// 0x15: Not-less-than (unordered, non-signaling) \\n\n" |
| 10166 | "/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \\n\n" |
| 10167 | "/// 0x17: Ordered (signaling) \\n\n" |
| 10168 | "/// 0x18: Equal (unordered, signaling) \\n\n" |
| 10169 | "/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \\n\n" |
| 10170 | "/// 0x1A: Not-greater-than (unordered, non-signaling) \\n\n" |
| 10171 | "/// 0x1B: False (ordered, signaling) \\n\n" |
| 10172 | "/// 0x1C: Not-equal (ordered, signaling) \\n\n" |
| 10173 | "/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \\n\n" |
| 10174 | "/// 0x1E: Greater-than (ordered, non-signaling) \\n\n" |
| 10175 | "/// 0x1F: True (unordered, signaling)\n" |
| 10176 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 10177 | "#define _mm_cmp_ss(a, b, c) \\\n" |
| 10178 | " (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \\\n" |
| 10179 | " (__v4sf)(__m128)(b), (c))\n" |
| 10180 | "\n" |
| 10181 | "/// Takes a [8 x i32] vector and returns the vector element value\n" |
| 10182 | "/// indexed by the immediate constant operand.\n" |
| 10183 | "///\n" |
| 10184 | "/// \\headerfile <x86intrin.h>\n" |
| 10185 | "///\n" |
| 10186 | "/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n" |
| 10187 | "/// instruction.\n" |
| 10188 | "///\n" |
| 10189 | "/// \\param __a\n" |
| 10190 | "/// A 256-bit vector of [8 x i32].\n" |
| 10191 | "/// \\param __imm\n" |
| 10192 | "/// An immediate integer operand with bits [2:0] determining which vector\n" |
| 10193 | "/// element is extracted and returned.\n" |
| 10194 | "/// \\returns A 32-bit integer containing the extracted 32 bits of extended\n" |
| 10195 | "/// packed data.\n" |
| 10196 | "#define _mm256_extract_epi32(X, N) \\\n" |
| 10197 | " (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))\n" |
| 10198 | "\n" |
| 10199 | "/// Takes a [16 x i16] vector and returns the vector element value\n" |
| 10200 | "/// indexed by the immediate constant operand.\n" |
| 10201 | "///\n" |
| 10202 | "/// \\headerfile <x86intrin.h>\n" |
| 10203 | "///\n" |
| 10204 | "/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n" |
| 10205 | "/// instruction.\n" |
| 10206 | "///\n" |
| 10207 | "/// \\param __a\n" |
| 10208 | "/// A 256-bit integer vector of [16 x i16].\n" |
| 10209 | "/// \\param __imm\n" |
| 10210 | "/// An immediate integer operand with bits [3:0] determining which vector\n" |
| 10211 | "/// element is extracted and returned.\n" |
| 10212 | "/// \\returns A 32-bit integer containing the extracted 16 bits of zero extended\n" |
| 10213 | "/// packed data.\n" |
| 10214 | "#define _mm256_extract_epi16(X, N) \\\n" |
| 10215 | " (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \\\n" |
| 10216 | " (int)(N))\n" |
| 10217 | "\n" |
| 10218 | "/// Takes a [32 x i8] vector and returns the vector element value\n" |
| 10219 | "/// indexed by the immediate constant operand.\n" |
| 10220 | "///\n" |
| 10221 | "/// \\headerfile <x86intrin.h>\n" |
| 10222 | "///\n" |
| 10223 | "/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n" |
| 10224 | "/// instruction.\n" |
| 10225 | "///\n" |
| 10226 | "/// \\param __a\n" |
| 10227 | "/// A 256-bit integer vector of [32 x i8].\n" |
| 10228 | "/// \\param __imm\n" |
| 10229 | "/// An immediate integer operand with bits [4:0] determining which vector\n" |
| 10230 | "/// element is extracted and returned.\n" |
| 10231 | "/// \\returns A 32-bit integer containing the extracted 8 bits of zero extended\n" |
| 10232 | "/// packed data.\n" |
| 10233 | "#define _mm256_extract_epi8(X, N) \\\n" |
| 10234 | " (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \\\n" |
| 10235 | " (int)(N))\n" |
| 10236 | "\n" |
| 10237 | "#ifdef __x86_64__\n" |
| 10238 | "/// Takes a [4 x i64] vector and returns the vector element value\n" |
| 10239 | "/// indexed by the immediate constant operand.\n" |
| 10240 | "///\n" |
| 10241 | "/// \\headerfile <x86intrin.h>\n" |
| 10242 | "///\n" |
| 10243 | "/// This intrinsic corresponds to the <c> VEXTRACTF128+COMPOSITE </c>\n" |
| 10244 | "/// instruction.\n" |
| 10245 | "///\n" |
| 10246 | "/// \\param __a\n" |
| 10247 | "/// A 256-bit integer vector of [4 x i64].\n" |
| 10248 | "/// \\param __imm\n" |
| 10249 | "/// An immediate integer operand with bits [1:0] determining which vector\n" |
| 10250 | "/// element is extracted and returned.\n" |
| 10251 | "/// \\returns A 64-bit integer containing the extracted 64 bits of extended\n" |
| 10252 | "/// packed data.\n" |
| 10253 | "#define _mm256_extract_epi64(X, N) \\\n" |
| 10254 | " (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))\n" |
| 10255 | "#endif\n" |
| 10256 | "\n" |
| 10257 | "/// Takes a [8 x i32] vector and replaces the vector element value\n" |
| 10258 | "/// indexed by the immediate constant operand by a new value. Returns the\n" |
| 10259 | "/// modified vector.\n" |
| 10260 | "///\n" |
| 10261 | "/// \\headerfile <x86intrin.h>\n" |
| 10262 | "///\n" |
| 10263 | "/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n" |
| 10264 | "/// instruction.\n" |
| 10265 | "///\n" |
| 10266 | "/// \\param __a\n" |
| 10267 | "/// A vector of [8 x i32] to be used by the insert operation.\n" |
| 10268 | "/// \\param __b\n" |
| 10269 | "/// An integer value. The replacement value for the insert operation.\n" |
| 10270 | "/// \\param __imm\n" |
| 10271 | "/// An immediate integer specifying the index of the vector element to be\n" |
| 10272 | "/// replaced.\n" |
| 10273 | "/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n" |
| 10274 | "/// \\a __imm with \\a __b.\n" |
| 10275 | "#define _mm256_insert_epi32(X, I, N) \\\n" |
| 10276 | " (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \\\n" |
| 10277 | " (int)(I), (int)(N))\n" |
| 10278 | "\n" |
| 10279 | "\n" |
| 10280 | "/// Takes a [16 x i16] vector and replaces the vector element value\n" |
| 10281 | "/// indexed by the immediate constant operand with a new value. Returns the\n" |
| 10282 | "/// modified vector.\n" |
| 10283 | "///\n" |
| 10284 | "/// \\headerfile <x86intrin.h>\n" |
| 10285 | "///\n" |
| 10286 | "/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n" |
| 10287 | "/// instruction.\n" |
| 10288 | "///\n" |
| 10289 | "/// \\param __a\n" |
| 10290 | "/// A vector of [16 x i16] to be used by the insert operation.\n" |
| 10291 | "/// \\param __b\n" |
| 10292 | "/// An i16 integer value. The replacement value for the insert operation.\n" |
| 10293 | "/// \\param __imm\n" |
| 10294 | "/// An immediate integer specifying the index of the vector element to be\n" |
| 10295 | "/// replaced.\n" |
| 10296 | "/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n" |
| 10297 | "/// \\a __imm with \\a __b.\n" |
| 10298 | "#define _mm256_insert_epi16(X, I, N) \\\n" |
| 10299 | " (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \\\n" |
| 10300 | " (int)(I), (int)(N))\n" |
| 10301 | "\n" |
| 10302 | "/// Takes a [32 x i8] vector and replaces the vector element value\n" |
| 10303 | "/// indexed by the immediate constant operand with a new value. Returns the\n" |
| 10304 | "/// modified vector.\n" |
| 10305 | "///\n" |
| 10306 | "/// \\headerfile <x86intrin.h>\n" |
| 10307 | "///\n" |
| 10308 | "/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n" |
| 10309 | "/// instruction.\n" |
| 10310 | "///\n" |
| 10311 | "/// \\param __a\n" |
| 10312 | "/// A vector of [32 x i8] to be used by the insert operation.\n" |
| 10313 | "/// \\param __b\n" |
| 10314 | "/// An i8 integer value. The replacement value for the insert operation.\n" |
| 10315 | "/// \\param __imm\n" |
| 10316 | "/// An immediate integer specifying the index of the vector element to be\n" |
| 10317 | "/// replaced.\n" |
| 10318 | "/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n" |
| 10319 | "/// \\a __imm with \\a __b.\n" |
| 10320 | "#define _mm256_insert_epi8(X, I, N) \\\n" |
| 10321 | " (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \\\n" |
| 10322 | " (int)(I), (int)(N))\n" |
| 10323 | "\n" |
| 10324 | "#ifdef __x86_64__\n" |
| 10325 | "/// Takes a [4 x i64] vector and replaces the vector element value\n" |
| 10326 | "/// indexed by the immediate constant operand with a new value. Returns the\n" |
| 10327 | "/// modified vector.\n" |
| 10328 | "///\n" |
| 10329 | "/// \\headerfile <x86intrin.h>\n" |
| 10330 | "///\n" |
| 10331 | "/// This intrinsic corresponds to the <c> VINSERTF128+COMPOSITE </c>\n" |
| 10332 | "/// instruction.\n" |
| 10333 | "///\n" |
| 10334 | "/// \\param __a\n" |
| 10335 | "/// A vector of [4 x i64] to be used by the insert operation.\n" |
| 10336 | "/// \\param __b\n" |
| 10337 | "/// A 64-bit integer value. The replacement value for the insert operation.\n" |
| 10338 | "/// \\param __imm\n" |
| 10339 | "/// An immediate integer specifying the index of the vector element to be\n" |
| 10340 | "/// replaced.\n" |
| 10341 | "/// \\returns A copy of vector \\a __a, after replacing its element indexed by\n" |
| 10342 | "/// \\a __imm with \\a __b.\n" |
| 10343 | "#define _mm256_insert_epi64(X, I, N) \\\n" |
| 10344 | " (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \\\n" |
| 10345 | " (long long)(I), (int)(N))\n" |
| 10346 | "#endif\n" |
| 10347 | "\n" |
| 10348 | "/* Conversion */\n" |
| 10349 | "/// Converts a vector of [4 x i32] into a vector of [4 x double].\n" |
| 10350 | "///\n" |
| 10351 | "/// \\headerfile <x86intrin.h>\n" |
| 10352 | "///\n" |
| 10353 | "/// This intrinsic corresponds to the <c> VCVTDQ2PD </c> instruction.\n" |
| 10354 | "///\n" |
| 10355 | "/// \\param __a\n" |
| 10356 | "/// A 128-bit integer vector of [4 x i32].\n" |
| 10357 | "/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n" |
| 10358 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 10359 | "_mm256_cvtepi32_pd(__m128i __a)\n" |
| 10360 | "{\n" |
| 10361 | " return (__m256d)__builtin_convertvector((__v4si)__a, __v4df);\n" |
| 10362 | "}\n" |
| 10363 | "\n" |
| 10364 | "/// Converts a vector of [8 x i32] into a vector of [8 x float].\n" |
| 10365 | "///\n" |
| 10366 | "/// \\headerfile <x86intrin.h>\n" |
| 10367 | "///\n" |
| 10368 | "/// This intrinsic corresponds to the <c> VCVTDQ2PS </c> instruction.\n" |
| 10369 | "///\n" |
| 10370 | "/// \\param __a\n" |
| 10371 | "/// A 256-bit integer vector.\n" |
| 10372 | "/// \\returns A 256-bit vector of [8 x float] containing the converted values.\n" |
| 10373 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 10374 | "_mm256_cvtepi32_ps(__m256i __a)\n" |
| 10375 | "{\n" |
| 10376 | " return (__m256)__builtin_convertvector((__v8si)__a, __v8sf);\n" |
| 10377 | "}\n" |
| 10378 | "\n" |
| 10379 | "/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of\n" |
| 10380 | "/// [4 x float].\n" |
| 10381 | "///\n" |
| 10382 | "/// \\headerfile <x86intrin.h>\n" |
| 10383 | "///\n" |
| 10384 | "/// This intrinsic corresponds to the <c> VCVTPD2PS </c> instruction.\n" |
| 10385 | "///\n" |
| 10386 | "/// \\param __a\n" |
| 10387 | "/// A 256-bit vector of [4 x double].\n" |
| 10388 | "/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n" |
| 10389 | "static __inline __m128 __DEFAULT_FN_ATTRS\n" |
| 10390 | "_mm256_cvtpd_ps(__m256d __a)\n" |
| 10391 | "{\n" |
| 10392 | " return (__m128)__builtin_ia32_cvtpd2ps256((__v4df) __a);\n" |
| 10393 | "}\n" |
| 10394 | "\n" |
| 10395 | "/// Converts a vector of [8 x float] into a vector of [8 x i32].\n" |
| 10396 | "///\n" |
| 10397 | "/// \\headerfile <x86intrin.h>\n" |
| 10398 | "///\n" |
| 10399 | "/// This intrinsic corresponds to the <c> VCVTPS2DQ </c> instruction.\n" |
| 10400 | "///\n" |
| 10401 | "/// \\param __a\n" |
| 10402 | "/// A 256-bit vector of [8 x float].\n" |
| 10403 | "/// \\returns A 256-bit integer vector containing the converted values.\n" |
| 10404 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 10405 | "_mm256_cvtps_epi32(__m256 __a)\n" |
| 10406 | "{\n" |
| 10407 | " return (__m256i)__builtin_ia32_cvtps2dq256((__v8sf) __a);\n" |
| 10408 | "}\n" |
| 10409 | "\n" |
| 10410 | "/// Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4\n" |
| 10411 | "/// x double].\n" |
| 10412 | "///\n" |
| 10413 | "/// \\headerfile <x86intrin.h>\n" |
| 10414 | "///\n" |
| 10415 | "/// This intrinsic corresponds to the <c> VCVTPS2PD </c> instruction.\n" |
| 10416 | "///\n" |
| 10417 | "/// \\param __a\n" |
| 10418 | "/// A 128-bit vector of [4 x float].\n" |
| 10419 | "/// \\returns A 256-bit vector of [4 x double] containing the converted values.\n" |
| 10420 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 10421 | "_mm256_cvtps_pd(__m128 __a)\n" |
| 10422 | "{\n" |
| 10423 | " return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);\n" |
| 10424 | "}\n" |
| 10425 | "\n" |
| 10426 | "/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n" |
| 10427 | "/// x i32], truncating the result by rounding towards zero when it is\n" |
| 10428 | "/// inexact.\n" |
| 10429 | "///\n" |
| 10430 | "/// \\headerfile <x86intrin.h>\n" |
| 10431 | "///\n" |
| 10432 | "/// This intrinsic corresponds to the <c> VCVTTPD2DQ </c> instruction.\n" |
| 10433 | "///\n" |
| 10434 | "/// \\param __a\n" |
| 10435 | "/// A 256-bit vector of [4 x double].\n" |
| 10436 | "/// \\returns A 128-bit integer vector containing the converted values.\n" |
| 10437 | "static __inline __m128i __DEFAULT_FN_ATTRS\n" |
| 10438 | "_mm256_cvttpd_epi32(__m256d __a)\n" |
| 10439 | "{\n" |
| 10440 | " return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);\n" |
| 10441 | "}\n" |
| 10442 | "\n" |
| 10443 | "/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4\n" |
| 10444 | "/// x i32]. When a conversion is inexact, the value returned is rounded\n" |
| 10445 | "/// according to the rounding control bits in the MXCSR register.\n" |
| 10446 | "///\n" |
| 10447 | "/// \\headerfile <x86intrin.h>\n" |
| 10448 | "///\n" |
| 10449 | "/// This intrinsic corresponds to the <c> VCVTPD2DQ </c> instruction.\n" |
| 10450 | "///\n" |
| 10451 | "/// \\param __a\n" |
| 10452 | "/// A 256-bit vector of [4 x double].\n" |
| 10453 | "/// \\returns A 128-bit integer vector containing the converted values.\n" |
| 10454 | "static __inline __m128i __DEFAULT_FN_ATTRS\n" |
| 10455 | "_mm256_cvtpd_epi32(__m256d __a)\n" |
| 10456 | "{\n" |
| 10457 | " return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);\n" |
| 10458 | "}\n" |
| 10459 | "\n" |
| 10460 | "/// Converts a vector of [8 x float] into a vector of [8 x i32],\n" |
| 10461 | "/// truncating the result by rounding towards zero when it is inexact.\n" |
| 10462 | "///\n" |
| 10463 | "/// \\headerfile <x86intrin.h>\n" |
| 10464 | "///\n" |
| 10465 | "/// This intrinsic corresponds to the <c> VCVTTPS2DQ </c> instruction.\n" |
| 10466 | "///\n" |
| 10467 | "/// \\param __a\n" |
| 10468 | "/// A 256-bit vector of [8 x float].\n" |
| 10469 | "/// \\returns A 256-bit integer vector containing the converted values.\n" |
| 10470 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 10471 | "_mm256_cvttps_epi32(__m256 __a)\n" |
| 10472 | "{\n" |
| 10473 | " return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a);\n" |
| 10474 | "}\n" |
| 10475 | "\n" |
| 10476 | "/// Returns the first element of the input vector of [4 x double].\n" |
| 10477 | "///\n" |
| 10478 | "/// \\headerfile <avxintrin.h>\n" |
| 10479 | "///\n" |
| 10480 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 10481 | "/// instruction.\n" |
| 10482 | "///\n" |
| 10483 | "/// \\param __a\n" |
| 10484 | "/// A 256-bit vector of [4 x double].\n" |
| 10485 | "/// \\returns A 64 bit double containing the first element of the input vector.\n" |
| 10486 | "static __inline double __DEFAULT_FN_ATTRS\n" |
| 10487 | "_mm256_cvtsd_f64(__m256d __a)\n" |
| 10488 | "{\n" |
| 10489 | " return __a[0];\n" |
| 10490 | "}\n" |
| 10491 | "\n" |
| 10492 | "/// Returns the first element of the input vector of [8 x i32].\n" |
| 10493 | "///\n" |
| 10494 | "/// \\headerfile <avxintrin.h>\n" |
| 10495 | "///\n" |
| 10496 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 10497 | "/// instruction.\n" |
| 10498 | "///\n" |
| 10499 | "/// \\param __a\n" |
| 10500 | "/// A 256-bit vector of [8 x i32].\n" |
| 10501 | "/// \\returns A 32 bit integer containing the first element of the input vector.\n" |
| 10502 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 10503 | "_mm256_cvtsi256_si32(__m256i __a)\n" |
| 10504 | "{\n" |
| 10505 | " __v8si __b = (__v8si)__a;\n" |
| 10506 | " return __b[0];\n" |
| 10507 | "}\n" |
| 10508 | "\n" |
| 10509 | "/// Returns the first element of the input vector of [8 x float].\n" |
| 10510 | "///\n" |
| 10511 | "/// \\headerfile <avxintrin.h>\n" |
| 10512 | "///\n" |
| 10513 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 10514 | "/// instruction.\n" |
| 10515 | "///\n" |
| 10516 | "/// \\param __a\n" |
| 10517 | "/// A 256-bit vector of [8 x float].\n" |
| 10518 | "/// \\returns A 32 bit float containing the first element of the input vector.\n" |
| 10519 | "static __inline float __DEFAULT_FN_ATTRS\n" |
| 10520 | "_mm256_cvtss_f32(__m256 __a)\n" |
| 10521 | "{\n" |
| 10522 | " return __a[0];\n" |
| 10523 | "}\n" |
| 10524 | "\n" |
| 10525 | "/* Vector replicate */\n" |
| 10526 | "/// Moves and duplicates odd-indexed values from a 256-bit vector of\n" |
| 10527 | "/// [8 x float] to float values in a 256-bit vector of [8 x float].\n" |
| 10528 | "///\n" |
| 10529 | "/// \\headerfile <x86intrin.h>\n" |
| 10530 | "///\n" |
| 10531 | "/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n" |
| 10532 | "///\n" |
| 10533 | "/// \\param __a\n" |
| 10534 | "/// A 256-bit vector of [8 x float]. \\n\n" |
| 10535 | "/// Bits [255:224] of \\a __a are written to bits [255:224] and [223:192] of\n" |
| 10536 | "/// the return value. \\n\n" |
| 10537 | "/// Bits [191:160] of \\a __a are written to bits [191:160] and [159:128] of\n" |
| 10538 | "/// the return value. \\n\n" |
| 10539 | "/// Bits [127:96] of \\a __a are written to bits [127:96] and [95:64] of the\n" |
| 10540 | "/// return value. \\n\n" |
| 10541 | "/// Bits [63:32] of \\a __a are written to bits [63:32] and [31:0] of the\n" |
| 10542 | "/// return value.\n" |
| 10543 | "/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n" |
| 10544 | "/// values.\n" |
| 10545 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 10546 | "_mm256_movehdup_ps(__m256 __a)\n" |
| 10547 | "{\n" |
| 10548 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 1, 1, 3, 3, 5, 5, 7, 7);\n" |
| 10549 | "}\n" |
| 10550 | "\n" |
| 10551 | "/// Moves and duplicates even-indexed values from a 256-bit vector of\n" |
| 10552 | "/// [8 x float] to float values in a 256-bit vector of [8 x float].\n" |
| 10553 | "///\n" |
| 10554 | "/// \\headerfile <x86intrin.h>\n" |
| 10555 | "///\n" |
| 10556 | "/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n" |
| 10557 | "///\n" |
| 10558 | "/// \\param __a\n" |
| 10559 | "/// A 256-bit vector of [8 x float]. \\n\n" |
| 10560 | "/// Bits [223:192] of \\a __a are written to bits [255:224] and [223:192] of\n" |
| 10561 | "/// the return value. \\n\n" |
| 10562 | "/// Bits [159:128] of \\a __a are written to bits [191:160] and [159:128] of\n" |
| 10563 | "/// the return value. \\n\n" |
| 10564 | "/// Bits [95:64] of \\a __a are written to bits [127:96] and [95:64] of the\n" |
| 10565 | "/// return value. \\n\n" |
| 10566 | "/// Bits [31:0] of \\a __a are written to bits [63:32] and [31:0] of the\n" |
| 10567 | "/// return value.\n" |
| 10568 | "/// \\returns A 256-bit vector of [8 x float] containing the moved and duplicated\n" |
| 10569 | "/// values.\n" |
| 10570 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 10571 | "_mm256_moveldup_ps(__m256 __a)\n" |
| 10572 | "{\n" |
| 10573 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 0, 2, 2, 4, 4, 6, 6);\n" |
| 10574 | "}\n" |
| 10575 | "\n" |
| 10576 | "/// Moves and duplicates double-precision floating point values from a\n" |
| 10577 | "/// 256-bit vector of [4 x double] to double-precision values in a 256-bit\n" |
| 10578 | "/// vector of [4 x double].\n" |
| 10579 | "///\n" |
| 10580 | "/// \\headerfile <x86intrin.h>\n" |
| 10581 | "///\n" |
| 10582 | "/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n" |
| 10583 | "///\n" |
| 10584 | "/// \\param __a\n" |
| 10585 | "/// A 256-bit vector of [4 x double]. \\n\n" |
| 10586 | "/// Bits [63:0] of \\a __a are written to bits [127:64] and [63:0] of the\n" |
| 10587 | "/// return value. \\n\n" |
| 10588 | "/// Bits [191:128] of \\a __a are written to bits [255:192] and [191:128] of\n" |
| 10589 | "/// the return value.\n" |
| 10590 | "/// \\returns A 256-bit vector of [4 x double] containing the moved and\n" |
| 10591 | "/// duplicated values.\n" |
| 10592 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 10593 | "_mm256_movedup_pd(__m256d __a)\n" |
| 10594 | "{\n" |
| 10595 | " return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 0, 2, 2);\n" |
| 10596 | "}\n" |
| 10597 | "\n" |
| 10598 | "/* Unpack and Interleave */\n" |
| 10599 | "/// Unpacks the odd-indexed vector elements from two 256-bit vectors of\n" |
| 10600 | "/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n" |
| 10601 | "///\n" |
| 10602 | "/// \\headerfile <x86intrin.h>\n" |
| 10603 | "///\n" |
| 10604 | "/// This intrinsic corresponds to the <c> VUNPCKHPD </c> instruction.\n" |
| 10605 | "///\n" |
| 10606 | "/// \\param __a\n" |
| 10607 | "/// A 256-bit floating-point vector of [4 x double]. \\n\n" |
| 10608 | "/// Bits [127:64] are written to bits [63:0] of the return value. \\n\n" |
| 10609 | "/// Bits [255:192] are written to bits [191:128] of the return value. \\n\n" |
| 10610 | "/// \\param __b\n" |
| 10611 | "/// A 256-bit floating-point vector of [4 x double]. \\n\n" |
| 10612 | "/// Bits [127:64] are written to bits [127:64] of the return value. \\n\n" |
| 10613 | "/// Bits [255:192] are written to bits [255:192] of the return value. \\n\n" |
| 10614 | "/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n" |
| 10615 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 10616 | "_mm256_unpackhi_pd(__m256d __a, __m256d __b)\n" |
| 10617 | "{\n" |
| 10618 | " return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 1, 5, 1+2, 5+2);\n" |
| 10619 | "}\n" |
| 10620 | "\n" |
| 10621 | "/// Unpacks the even-indexed vector elements from two 256-bit vectors of\n" |
| 10622 | "/// [4 x double] and interleaves them into a 256-bit vector of [4 x double].\n" |
| 10623 | "///\n" |
| 10624 | "/// \\headerfile <x86intrin.h>\n" |
| 10625 | "///\n" |
| 10626 | "/// This intrinsic corresponds to the <c> VUNPCKLPD </c> instruction.\n" |
| 10627 | "///\n" |
| 10628 | "/// \\param __a\n" |
| 10629 | "/// A 256-bit floating-point vector of [4 x double]. \\n\n" |
| 10630 | "/// Bits [63:0] are written to bits [63:0] of the return value. \\n\n" |
| 10631 | "/// Bits [191:128] are written to bits [191:128] of the return value.\n" |
| 10632 | "/// \\param __b\n" |
| 10633 | "/// A 256-bit floating-point vector of [4 x double]. \\n\n" |
| 10634 | "/// Bits [63:0] are written to bits [127:64] of the return value. \\n\n" |
| 10635 | "/// Bits [191:128] are written to bits [255:192] of the return value. \\n\n" |
| 10636 | "/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n" |
| 10637 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 10638 | "_mm256_unpacklo_pd(__m256d __a, __m256d __b)\n" |
| 10639 | "{\n" |
| 10640 | " return __builtin_shufflevector((__v4df)__a, (__v4df)__b, 0, 4, 0+2, 4+2);\n" |
| 10641 | "}\n" |
| 10642 | "\n" |
| 10643 | "/// Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the\n" |
| 10644 | "/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n" |
| 10645 | "/// vector of [8 x float].\n" |
| 10646 | "///\n" |
| 10647 | "/// \\headerfile <x86intrin.h>\n" |
| 10648 | "///\n" |
| 10649 | "/// This intrinsic corresponds to the <c> VUNPCKHPS </c> instruction.\n" |
| 10650 | "///\n" |
| 10651 | "/// \\param __a\n" |
| 10652 | "/// A 256-bit vector of [8 x float]. \\n\n" |
| 10653 | "/// Bits [95:64] are written to bits [31:0] of the return value. \\n\n" |
| 10654 | "/// Bits [127:96] are written to bits [95:64] of the return value. \\n\n" |
| 10655 | "/// Bits [223:192] are written to bits [159:128] of the return value. \\n\n" |
| 10656 | "/// Bits [255:224] are written to bits [223:192] of the return value.\n" |
| 10657 | "/// \\param __b\n" |
| 10658 | "/// A 256-bit vector of [8 x float]. \\n\n" |
| 10659 | "/// Bits [95:64] are written to bits [63:32] of the return value. \\n\n" |
| 10660 | "/// Bits [127:96] are written to bits [127:96] of the return value. \\n\n" |
| 10661 | "/// Bits [223:192] are written to bits [191:160] of the return value. \\n\n" |
| 10662 | "/// Bits [255:224] are written to bits [255:224] of the return value.\n" |
| 10663 | "/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n" |
| 10664 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 10665 | "_mm256_unpackhi_ps(__m256 __a, __m256 __b)\n" |
| 10666 | "{\n" |
| 10667 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 2, 10, 2+1, 10+1, 6, 14, 6+1, 14+1);\n" |
| 10668 | "}\n" |
| 10669 | "\n" |
| 10670 | "/// Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the\n" |
| 10671 | "/// two 256-bit vectors of [8 x float] and interleaves them into a 256-bit\n" |
| 10672 | "/// vector of [8 x float].\n" |
| 10673 | "///\n" |
| 10674 | "/// \\headerfile <x86intrin.h>\n" |
| 10675 | "///\n" |
| 10676 | "/// This intrinsic corresponds to the <c> VUNPCKLPS </c> instruction.\n" |
| 10677 | "///\n" |
| 10678 | "/// \\param __a\n" |
| 10679 | "/// A 256-bit vector of [8 x float]. \\n\n" |
| 10680 | "/// Bits [31:0] are written to bits [31:0] of the return value. \\n\n" |
| 10681 | "/// Bits [63:32] are written to bits [95:64] of the return value. \\n\n" |
| 10682 | "/// Bits [159:128] are written to bits [159:128] of the return value. \\n\n" |
| 10683 | "/// Bits [191:160] are written to bits [223:192] of the return value.\n" |
| 10684 | "/// \\param __b\n" |
| 10685 | "/// A 256-bit vector of [8 x float]. \\n\n" |
| 10686 | "/// Bits [31:0] are written to bits [63:32] of the return value. \\n\n" |
| 10687 | "/// Bits [63:32] are written to bits [127:96] of the return value. \\n\n" |
| 10688 | "/// Bits [159:128] are written to bits [191:160] of the return value. \\n\n" |
| 10689 | "/// Bits [191:160] are written to bits [255:224] of the return value.\n" |
| 10690 | "/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n" |
| 10691 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 10692 | "_mm256_unpacklo_ps(__m256 __a, __m256 __b)\n" |
| 10693 | "{\n" |
| 10694 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__b, 0, 8, 0+1, 8+1, 4, 12, 4+1, 12+1);\n" |
| 10695 | "}\n" |
| 10696 | "\n" |
| 10697 | "/* Bit Test */\n" |
| 10698 | "/// Given two 128-bit floating-point vectors of [2 x double], perform an\n" |
| 10699 | "/// element-by-element comparison of the double-precision element in the\n" |
| 10700 | "/// first source vector and the corresponding element in the second source\n" |
| 10701 | "/// vector.\n" |
| 10702 | "///\n" |
| 10703 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10704 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10705 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10706 | "/// ZF flag is set to 1. \\n\n" |
| 10707 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10708 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10709 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10710 | "/// This intrinsic returns the value of the ZF flag.\n" |
| 10711 | "///\n" |
| 10712 | "/// \\headerfile <x86intrin.h>\n" |
| 10713 | "///\n" |
| 10714 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
| 10715 | "///\n" |
| 10716 | "/// \\param __a\n" |
| 10717 | "/// A 128-bit vector of [2 x double].\n" |
| 10718 | "/// \\param __b\n" |
| 10719 | "/// A 128-bit vector of [2 x double].\n" |
| 10720 | "/// \\returns the ZF flag in the EFLAGS register.\n" |
| 10721 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
| 10722 | "_mm_testz_pd(__m128d __a, __m128d __b)\n" |
| 10723 | "{\n" |
| 10724 | " return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);\n" |
| 10725 | "}\n" |
| 10726 | "\n" |
| 10727 | "/// Given two 128-bit floating-point vectors of [2 x double], perform an\n" |
| 10728 | "/// element-by-element comparison of the double-precision element in the\n" |
| 10729 | "/// first source vector and the corresponding element in the second source\n" |
| 10730 | "/// vector.\n" |
| 10731 | "///\n" |
| 10732 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10733 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10734 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10735 | "/// ZF flag is set to 1. \\n\n" |
| 10736 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10737 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10738 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10739 | "/// This intrinsic returns the value of the CF flag.\n" |
| 10740 | "///\n" |
| 10741 | "/// \\headerfile <x86intrin.h>\n" |
| 10742 | "///\n" |
| 10743 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
| 10744 | "///\n" |
| 10745 | "/// \\param __a\n" |
| 10746 | "/// A 128-bit vector of [2 x double].\n" |
| 10747 | "/// \\param __b\n" |
| 10748 | "/// A 128-bit vector of [2 x double].\n" |
| 10749 | "/// \\returns the CF flag in the EFLAGS register.\n" |
| 10750 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
| 10751 | "_mm_testc_pd(__m128d __a, __m128d __b)\n" |
| 10752 | "{\n" |
| 10753 | " return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);\n" |
| 10754 | "}\n" |
| 10755 | "\n" |
| 10756 | "/// Given two 128-bit floating-point vectors of [2 x double], perform an\n" |
| 10757 | "/// element-by-element comparison of the double-precision element in the\n" |
| 10758 | "/// first source vector and the corresponding element in the second source\n" |
| 10759 | "/// vector.\n" |
| 10760 | "///\n" |
| 10761 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10762 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10763 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10764 | "/// ZF flag is set to 1. \\n\n" |
| 10765 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10766 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10767 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10768 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
| 10769 | "/// otherwise it returns 0.\n" |
| 10770 | "///\n" |
| 10771 | "/// \\headerfile <x86intrin.h>\n" |
| 10772 | "///\n" |
| 10773 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
| 10774 | "///\n" |
| 10775 | "/// \\param __a\n" |
| 10776 | "/// A 128-bit vector of [2 x double].\n" |
| 10777 | "/// \\param __b\n" |
| 10778 | "/// A 128-bit vector of [2 x double].\n" |
| 10779 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
| 10780 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
| 10781 | "_mm_testnzc_pd(__m128d __a, __m128d __b)\n" |
| 10782 | "{\n" |
| 10783 | " return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);\n" |
| 10784 | "}\n" |
| 10785 | "\n" |
| 10786 | "/// Given two 128-bit floating-point vectors of [4 x float], perform an\n" |
| 10787 | "/// element-by-element comparison of the single-precision element in the\n" |
| 10788 | "/// first source vector and the corresponding element in the second source\n" |
| 10789 | "/// vector.\n" |
| 10790 | "///\n" |
| 10791 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10792 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10793 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10794 | "/// ZF flag is set to 1. \\n\n" |
| 10795 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10796 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10797 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10798 | "/// This intrinsic returns the value of the ZF flag.\n" |
| 10799 | "///\n" |
| 10800 | "/// \\headerfile <x86intrin.h>\n" |
| 10801 | "///\n" |
| 10802 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
| 10803 | "///\n" |
| 10804 | "/// \\param __a\n" |
| 10805 | "/// A 128-bit vector of [4 x float].\n" |
| 10806 | "/// \\param __b\n" |
| 10807 | "/// A 128-bit vector of [4 x float].\n" |
| 10808 | "/// \\returns the ZF flag.\n" |
| 10809 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
| 10810 | "_mm_testz_ps(__m128 __a, __m128 __b)\n" |
| 10811 | "{\n" |
| 10812 | " return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);\n" |
| 10813 | "}\n" |
| 10814 | "\n" |
| 10815 | "/// Given two 128-bit floating-point vectors of [4 x float], perform an\n" |
| 10816 | "/// element-by-element comparison of the single-precision element in the\n" |
| 10817 | "/// first source vector and the corresponding element in the second source\n" |
| 10818 | "/// vector.\n" |
| 10819 | "///\n" |
| 10820 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10821 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10822 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10823 | "/// ZF flag is set to 1. \\n\n" |
| 10824 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10825 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10826 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10827 | "/// This intrinsic returns the value of the CF flag.\n" |
| 10828 | "///\n" |
| 10829 | "/// \\headerfile <x86intrin.h>\n" |
| 10830 | "///\n" |
| 10831 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
| 10832 | "///\n" |
| 10833 | "/// \\param __a\n" |
| 10834 | "/// A 128-bit vector of [4 x float].\n" |
| 10835 | "/// \\param __b\n" |
| 10836 | "/// A 128-bit vector of [4 x float].\n" |
| 10837 | "/// \\returns the CF flag.\n" |
| 10838 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
| 10839 | "_mm_testc_ps(__m128 __a, __m128 __b)\n" |
| 10840 | "{\n" |
| 10841 | " return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);\n" |
| 10842 | "}\n" |
| 10843 | "\n" |
| 10844 | "/// Given two 128-bit floating-point vectors of [4 x float], perform an\n" |
| 10845 | "/// element-by-element comparison of the single-precision element in the\n" |
| 10846 | "/// first source vector and the corresponding element in the second source\n" |
| 10847 | "/// vector.\n" |
| 10848 | "///\n" |
| 10849 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10850 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10851 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10852 | "/// ZF flag is set to 1. \\n\n" |
| 10853 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10854 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10855 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10856 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
| 10857 | "/// otherwise it returns 0.\n" |
| 10858 | "///\n" |
| 10859 | "/// \\headerfile <x86intrin.h>\n" |
| 10860 | "///\n" |
| 10861 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
| 10862 | "///\n" |
| 10863 | "/// \\param __a\n" |
| 10864 | "/// A 128-bit vector of [4 x float].\n" |
| 10865 | "/// \\param __b\n" |
| 10866 | "/// A 128-bit vector of [4 x float].\n" |
| 10867 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
| 10868 | "static __inline int __DEFAULT_FN_ATTRS128\n" |
| 10869 | "_mm_testnzc_ps(__m128 __a, __m128 __b)\n" |
| 10870 | "{\n" |
| 10871 | " return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);\n" |
| 10872 | "}\n" |
| 10873 | "\n" |
| 10874 | "/// Given two 256-bit floating-point vectors of [4 x double], perform an\n" |
| 10875 | "/// element-by-element comparison of the double-precision elements in the\n" |
| 10876 | "/// first source vector and the corresponding elements in the second source\n" |
| 10877 | "/// vector.\n" |
| 10878 | "///\n" |
| 10879 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10880 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10881 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10882 | "/// ZF flag is set to 1. \\n\n" |
| 10883 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10884 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10885 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10886 | "/// This intrinsic returns the value of the ZF flag.\n" |
| 10887 | "///\n" |
| 10888 | "/// \\headerfile <x86intrin.h>\n" |
| 10889 | "///\n" |
| 10890 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
| 10891 | "///\n" |
| 10892 | "/// \\param __a\n" |
| 10893 | "/// A 256-bit vector of [4 x double].\n" |
| 10894 | "/// \\param __b\n" |
| 10895 | "/// A 256-bit vector of [4 x double].\n" |
| 10896 | "/// \\returns the ZF flag.\n" |
| 10897 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 10898 | "_mm256_testz_pd(__m256d __a, __m256d __b)\n" |
| 10899 | "{\n" |
| 10900 | " return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);\n" |
| 10901 | "}\n" |
| 10902 | "\n" |
| 10903 | "/// Given two 256-bit floating-point vectors of [4 x double], perform an\n" |
| 10904 | "/// element-by-element comparison of the double-precision elements in the\n" |
| 10905 | "/// first source vector and the corresponding elements in the second source\n" |
| 10906 | "/// vector.\n" |
| 10907 | "///\n" |
| 10908 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10909 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10910 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10911 | "/// ZF flag is set to 1. \\n\n" |
| 10912 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10913 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10914 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10915 | "/// This intrinsic returns the value of the CF flag.\n" |
| 10916 | "///\n" |
| 10917 | "/// \\headerfile <x86intrin.h>\n" |
| 10918 | "///\n" |
| 10919 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
| 10920 | "///\n" |
| 10921 | "/// \\param __a\n" |
| 10922 | "/// A 256-bit vector of [4 x double].\n" |
| 10923 | "/// \\param __b\n" |
| 10924 | "/// A 256-bit vector of [4 x double].\n" |
| 10925 | "/// \\returns the CF flag.\n" |
| 10926 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 10927 | "_mm256_testc_pd(__m256d __a, __m256d __b)\n" |
| 10928 | "{\n" |
| 10929 | " return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);\n" |
| 10930 | "}\n" |
| 10931 | "\n" |
| 10932 | "/// Given two 256-bit floating-point vectors of [4 x double], perform an\n" |
| 10933 | "/// element-by-element comparison of the double-precision elements in the\n" |
| 10934 | "/// first source vector and the corresponding elements in the second source\n" |
| 10935 | "/// vector.\n" |
| 10936 | "///\n" |
| 10937 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10938 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10939 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10940 | "/// ZF flag is set to 1. \\n\n" |
| 10941 | "/// If there is at least one pair of double-precision elements where the\n" |
| 10942 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10943 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10944 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
| 10945 | "/// otherwise it returns 0.\n" |
| 10946 | "///\n" |
| 10947 | "/// \\headerfile <x86intrin.h>\n" |
| 10948 | "///\n" |
| 10949 | "/// This intrinsic corresponds to the <c> VTESTPD </c> instruction.\n" |
| 10950 | "///\n" |
| 10951 | "/// \\param __a\n" |
| 10952 | "/// A 256-bit vector of [4 x double].\n" |
| 10953 | "/// \\param __b\n" |
| 10954 | "/// A 256-bit vector of [4 x double].\n" |
| 10955 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
| 10956 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 10957 | "_mm256_testnzc_pd(__m256d __a, __m256d __b)\n" |
| 10958 | "{\n" |
| 10959 | " return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);\n" |
| 10960 | "}\n" |
| 10961 | "\n" |
| 10962 | "/// Given two 256-bit floating-point vectors of [8 x float], perform an\n" |
| 10963 | "/// element-by-element comparison of the single-precision element in the\n" |
| 10964 | "/// first source vector and the corresponding element in the second source\n" |
| 10965 | "/// vector.\n" |
| 10966 | "///\n" |
| 10967 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10968 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10969 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10970 | "/// ZF flag is set to 1. \\n\n" |
| 10971 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10972 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 10973 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 10974 | "/// This intrinsic returns the value of the ZF flag.\n" |
| 10975 | "///\n" |
| 10976 | "/// \\headerfile <x86intrin.h>\n" |
| 10977 | "///\n" |
| 10978 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
| 10979 | "///\n" |
| 10980 | "/// \\param __a\n" |
| 10981 | "/// A 256-bit vector of [8 x float].\n" |
| 10982 | "/// \\param __b\n" |
| 10983 | "/// A 256-bit vector of [8 x float].\n" |
| 10984 | "/// \\returns the ZF flag.\n" |
| 10985 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 10986 | "_mm256_testz_ps(__m256 __a, __m256 __b)\n" |
| 10987 | "{\n" |
| 10988 | " return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);\n" |
| 10989 | "}\n" |
| 10990 | "\n" |
| 10991 | "/// Given two 256-bit floating-point vectors of [8 x float], perform an\n" |
| 10992 | "/// element-by-element comparison of the single-precision element in the\n" |
| 10993 | "/// first source vector and the corresponding element in the second source\n" |
| 10994 | "/// vector.\n" |
| 10995 | "///\n" |
| 10996 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 10997 | "/// If there is at least one pair of single-precision elements where the\n" |
| 10998 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 10999 | "/// ZF flag is set to 1. \\n\n" |
| 11000 | "/// If there is at least one pair of single-precision elements where the\n" |
| 11001 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 11002 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 11003 | "/// This intrinsic returns the value of the CF flag.\n" |
| 11004 | "///\n" |
| 11005 | "/// \\headerfile <x86intrin.h>\n" |
| 11006 | "///\n" |
| 11007 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
| 11008 | "///\n" |
| 11009 | "/// \\param __a\n" |
| 11010 | "/// A 256-bit vector of [8 x float].\n" |
| 11011 | "/// \\param __b\n" |
| 11012 | "/// A 256-bit vector of [8 x float].\n" |
| 11013 | "/// \\returns the CF flag.\n" |
| 11014 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 11015 | "_mm256_testc_ps(__m256 __a, __m256 __b)\n" |
| 11016 | "{\n" |
| 11017 | " return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);\n" |
| 11018 | "}\n" |
| 11019 | "\n" |
| 11020 | "/// Given two 256-bit floating-point vectors of [8 x float], perform an\n" |
| 11021 | "/// element-by-element comparison of the single-precision elements in the\n" |
| 11022 | "/// first source vector and the corresponding elements in the second source\n" |
| 11023 | "/// vector.\n" |
| 11024 | "///\n" |
| 11025 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 11026 | "/// If there is at least one pair of single-precision elements where the\n" |
| 11027 | "/// sign-bits of both elements are 1, the ZF flag is set to 0. Otherwise the\n" |
| 11028 | "/// ZF flag is set to 1. \\n\n" |
| 11029 | "/// If there is at least one pair of single-precision elements where the\n" |
| 11030 | "/// sign-bit of the first element is 0 and the sign-bit of the second element\n" |
| 11031 | "/// is 1, the CF flag is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 11032 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
| 11033 | "/// otherwise it returns 0.\n" |
| 11034 | "///\n" |
| 11035 | "/// \\headerfile <x86intrin.h>\n" |
| 11036 | "///\n" |
| 11037 | "/// This intrinsic corresponds to the <c> VTESTPS </c> instruction.\n" |
| 11038 | "///\n" |
| 11039 | "/// \\param __a\n" |
| 11040 | "/// A 256-bit vector of [8 x float].\n" |
| 11041 | "/// \\param __b\n" |
| 11042 | "/// A 256-bit vector of [8 x float].\n" |
| 11043 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
| 11044 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 11045 | "_mm256_testnzc_ps(__m256 __a, __m256 __b)\n" |
| 11046 | "{\n" |
| 11047 | " return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);\n" |
| 11048 | "}\n" |
| 11049 | "\n" |
| 11050 | "/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n" |
| 11051 | "/// of the two source vectors.\n" |
| 11052 | "///\n" |
| 11053 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 11054 | "/// If there is at least one pair of bits where both bits are 1, the ZF flag\n" |
| 11055 | "/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n" |
| 11056 | "/// If there is at least one pair of bits where the bit from the first source\n" |
| 11057 | "/// vector is 0 and the bit from the second source vector is 1, the CF flag\n" |
| 11058 | "/// is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 11059 | "/// This intrinsic returns the value of the ZF flag.\n" |
| 11060 | "///\n" |
| 11061 | "/// \\headerfile <x86intrin.h>\n" |
| 11062 | "///\n" |
| 11063 | "/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n" |
| 11064 | "///\n" |
| 11065 | "/// \\param __a\n" |
| 11066 | "/// A 256-bit integer vector.\n" |
| 11067 | "/// \\param __b\n" |
| 11068 | "/// A 256-bit integer vector.\n" |
| 11069 | "/// \\returns the ZF flag.\n" |
| 11070 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 11071 | "_mm256_testz_si256(__m256i __a, __m256i __b)\n" |
| 11072 | "{\n" |
| 11073 | " return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);\n" |
| 11074 | "}\n" |
| 11075 | "\n" |
| 11076 | "/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n" |
| 11077 | "/// of the two source vectors.\n" |
| 11078 | "///\n" |
| 11079 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 11080 | "/// If there is at least one pair of bits where both bits are 1, the ZF flag\n" |
| 11081 | "/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n" |
| 11082 | "/// If there is at least one pair of bits where the bit from the first source\n" |
| 11083 | "/// vector is 0 and the bit from the second source vector is 1, the CF flag\n" |
| 11084 | "/// is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 11085 | "/// This intrinsic returns the value of the CF flag.\n" |
| 11086 | "///\n" |
| 11087 | "/// \\headerfile <x86intrin.h>\n" |
| 11088 | "///\n" |
| 11089 | "/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n" |
| 11090 | "///\n" |
| 11091 | "/// \\param __a\n" |
| 11092 | "/// A 256-bit integer vector.\n" |
| 11093 | "/// \\param __b\n" |
| 11094 | "/// A 256-bit integer vector.\n" |
| 11095 | "/// \\returns the CF flag.\n" |
| 11096 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 11097 | "_mm256_testc_si256(__m256i __a, __m256i __b)\n" |
| 11098 | "{\n" |
| 11099 | " return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);\n" |
| 11100 | "}\n" |
| 11101 | "\n" |
| 11102 | "/// Given two 256-bit integer vectors, perform a bit-by-bit comparison\n" |
| 11103 | "/// of the two source vectors.\n" |
| 11104 | "///\n" |
| 11105 | "/// The EFLAGS register is updated as follows: \\n\n" |
| 11106 | "/// If there is at least one pair of bits where both bits are 1, the ZF flag\n" |
| 11107 | "/// is set to 0. Otherwise the ZF flag is set to 1. \\n\n" |
| 11108 | "/// If there is at least one pair of bits where the bit from the first source\n" |
| 11109 | "/// vector is 0 and the bit from the second source vector is 1, the CF flag\n" |
| 11110 | "/// is set to 0. Otherwise the CF flag is set to 1. \\n\n" |
| 11111 | "/// This intrinsic returns 1 if both the ZF and CF flags are set to 0,\n" |
| 11112 | "/// otherwise it returns 0.\n" |
| 11113 | "///\n" |
| 11114 | "/// \\headerfile <x86intrin.h>\n" |
| 11115 | "///\n" |
| 11116 | "/// This intrinsic corresponds to the <c> VPTEST </c> instruction.\n" |
| 11117 | "///\n" |
| 11118 | "/// \\param __a\n" |
| 11119 | "/// A 256-bit integer vector.\n" |
| 11120 | "/// \\param __b\n" |
| 11121 | "/// A 256-bit integer vector.\n" |
| 11122 | "/// \\returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.\n" |
| 11123 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 11124 | "_mm256_testnzc_si256(__m256i __a, __m256i __b)\n" |
| 11125 | "{\n" |
| 11126 | " return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);\n" |
| 11127 | "}\n" |
| 11128 | "\n" |
| 11129 | "/* Vector extract sign mask */\n" |
| 11130 | "/// Extracts the sign bits of double-precision floating point elements\n" |
| 11131 | "/// in a 256-bit vector of [4 x double] and writes them to the lower order\n" |
| 11132 | "/// bits of the return value.\n" |
| 11133 | "///\n" |
| 11134 | "/// \\headerfile <x86intrin.h>\n" |
| 11135 | "///\n" |
| 11136 | "/// This intrinsic corresponds to the <c> VMOVMSKPD </c> instruction.\n" |
| 11137 | "///\n" |
| 11138 | "/// \\param __a\n" |
| 11139 | "/// A 256-bit vector of [4 x double] containing the double-precision\n" |
| 11140 | "/// floating point values with sign bits to be extracted.\n" |
| 11141 | "/// \\returns The sign bits from the operand, written to bits [3:0].\n" |
| 11142 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 11143 | "_mm256_movemask_pd(__m256d __a)\n" |
| 11144 | "{\n" |
| 11145 | " return __builtin_ia32_movmskpd256((__v4df)__a);\n" |
| 11146 | "}\n" |
| 11147 | "\n" |
| 11148 | "/// Extracts the sign bits of single-precision floating point elements\n" |
| 11149 | "/// in a 256-bit vector of [8 x float] and writes them to the lower order\n" |
| 11150 | "/// bits of the return value.\n" |
| 11151 | "///\n" |
| 11152 | "/// \\headerfile <x86intrin.h>\n" |
| 11153 | "///\n" |
| 11154 | "/// This intrinsic corresponds to the <c> VMOVMSKPS </c> instruction.\n" |
| 11155 | "///\n" |
| 11156 | "/// \\param __a\n" |
| 11157 | "/// A 256-bit vector of [8 x float] containing the single-precision floating\n" |
| 11158 | "/// point values with sign bits to be extracted.\n" |
| 11159 | "/// \\returns The sign bits from the operand, written to bits [7:0].\n" |
| 11160 | "static __inline int __DEFAULT_FN_ATTRS\n" |
| 11161 | "_mm256_movemask_ps(__m256 __a)\n" |
| 11162 | "{\n" |
| 11163 | " return __builtin_ia32_movmskps256((__v8sf)__a);\n" |
| 11164 | "}\n" |
| 11165 | "\n" |
| 11166 | "/* Vector __zero */\n" |
| 11167 | "/// Zeroes the contents of all XMM or YMM registers.\n" |
| 11168 | "///\n" |
| 11169 | "/// \\headerfile <x86intrin.h>\n" |
| 11170 | "///\n" |
| 11171 | "/// This intrinsic corresponds to the <c> VZEROALL </c> instruction.\n" |
| 11172 | "static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n" |
| 11173 | "_mm256_zeroall(void)\n" |
| 11174 | "{\n" |
| 11175 | " __builtin_ia32_vzeroall();\n" |
| 11176 | "}\n" |
| 11177 | "\n" |
| 11178 | "/// Zeroes the upper 128 bits (bits 255:128) of all YMM registers.\n" |
| 11179 | "///\n" |
| 11180 | "/// \\headerfile <x86intrin.h>\n" |
| 11181 | "///\n" |
| 11182 | "/// This intrinsic corresponds to the <c> VZEROUPPER </c> instruction.\n" |
| 11183 | "static __inline void __attribute__((__always_inline__, __nodebug__, __target__(\"avx\")))\n" |
| 11184 | "_mm256_zeroupper(void)\n" |
| 11185 | "{\n" |
| 11186 | " __builtin_ia32_vzeroupper();\n" |
| 11187 | "}\n" |
| 11188 | "\n" |
| 11189 | "/* Vector load with broadcast */\n" |
| 11190 | "/// Loads a scalar single-precision floating point value from the\n" |
| 11191 | "/// specified address pointed to by \\a __a and broadcasts it to the elements\n" |
| 11192 | "/// of a [4 x float] vector.\n" |
| 11193 | "///\n" |
| 11194 | "/// \\headerfile <x86intrin.h>\n" |
| 11195 | "///\n" |
| 11196 | "/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n" |
| 11197 | "///\n" |
| 11198 | "/// \\param __a\n" |
| 11199 | "/// The single-precision floating point value to be broadcast.\n" |
| 11200 | "/// \\returns A 128-bit vector of [4 x float] whose 32-bit elements are set\n" |
| 11201 | "/// equal to the broadcast value.\n" |
| 11202 | "static __inline __m128 __DEFAULT_FN_ATTRS128\n" |
| 11203 | "_mm_broadcast_ss(float const *__a)\n" |
| 11204 | "{\n" |
| 11205 | " float __f = *__a;\n" |
| 11206 | " return __extension__ (__m128)(__v4sf){ __f, __f, __f, __f };\n" |
| 11207 | "}\n" |
| 11208 | "\n" |
| 11209 | "/// Loads a scalar double-precision floating point value from the\n" |
| 11210 | "/// specified address pointed to by \\a __a and broadcasts it to the elements\n" |
| 11211 | "/// of a [4 x double] vector.\n" |
| 11212 | "///\n" |
| 11213 | "/// \\headerfile <x86intrin.h>\n" |
| 11214 | "///\n" |
| 11215 | "/// This intrinsic corresponds to the <c> VBROADCASTSD </c> instruction.\n" |
| 11216 | "///\n" |
| 11217 | "/// \\param __a\n" |
| 11218 | "/// The double-precision floating point value to be broadcast.\n" |
| 11219 | "/// \\returns A 256-bit vector of [4 x double] whose 64-bit elements are set\n" |
| 11220 | "/// equal to the broadcast value.\n" |
| 11221 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 11222 | "_mm256_broadcast_sd(double const *__a)\n" |
| 11223 | "{\n" |
| 11224 | " double __d = *__a;\n" |
| 11225 | " return __extension__ (__m256d)(__v4df){ __d, __d, __d, __d };\n" |
| 11226 | "}\n" |
| 11227 | "\n" |
| 11228 | "/// Loads a scalar single-precision floating point value from the\n" |
| 11229 | "/// specified address pointed to by \\a __a and broadcasts it to the elements\n" |
| 11230 | "/// of a [8 x float] vector.\n" |
| 11231 | "///\n" |
| 11232 | "/// \\headerfile <x86intrin.h>\n" |
| 11233 | "///\n" |
| 11234 | "/// This intrinsic corresponds to the <c> VBROADCASTSS </c> instruction.\n" |
| 11235 | "///\n" |
| 11236 | "/// \\param __a\n" |
| 11237 | "/// The single-precision floating point value to be broadcast.\n" |
| 11238 | "/// \\returns A 256-bit vector of [8 x float] whose 32-bit elements are set\n" |
| 11239 | "/// equal to the broadcast value.\n" |
| 11240 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 11241 | "_mm256_broadcast_ss(float const *__a)\n" |
| 11242 | "{\n" |
| 11243 | " float __f = *__a;\n" |
| 11244 | " return __extension__ (__m256)(__v8sf){ __f, __f, __f, __f, __f, __f, __f, __f };\n" |
| 11245 | "}\n" |
| 11246 | "\n" |
| 11247 | "/// Loads the data from a 128-bit vector of [2 x double] from the\n" |
| 11248 | "/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n" |
| 11249 | "/// elements in a 256-bit vector of [4 x double].\n" |
| 11250 | "///\n" |
| 11251 | "/// \\headerfile <x86intrin.h>\n" |
| 11252 | "///\n" |
| 11253 | "/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n" |
| 11254 | "///\n" |
| 11255 | "/// \\param __a\n" |
| 11256 | "/// The 128-bit vector of [2 x double] to be broadcast.\n" |
| 11257 | "/// \\returns A 256-bit vector of [4 x double] whose 128-bit elements are set\n" |
| 11258 | "/// equal to the broadcast value.\n" |
| 11259 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 11260 | "_mm256_broadcast_pd(__m128d const *__a)\n" |
| 11261 | "{\n" |
| 11262 | " __m128d __b = _mm_loadu_pd((const double *)__a);\n" |
| 11263 | " return (__m256d)__builtin_shufflevector((__v2df)__b, (__v2df)__b,\n" |
| 11264 | " 0, 1, 0, 1);\n" |
| 11265 | "}\n" |
| 11266 | "\n" |
| 11267 | "/// Loads the data from a 128-bit vector of [4 x float] from the\n" |
| 11268 | "/// specified address pointed to by \\a __a and broadcasts it to 128-bit\n" |
| 11269 | "/// elements in a 256-bit vector of [8 x float].\n" |
| 11270 | "///\n" |
| 11271 | "/// \\headerfile <x86intrin.h>\n" |
| 11272 | "///\n" |
| 11273 | "/// This intrinsic corresponds to the <c> VBROADCASTF128 </c> instruction.\n" |
| 11274 | "///\n" |
| 11275 | "/// \\param __a\n" |
| 11276 | "/// The 128-bit vector of [4 x float] to be broadcast.\n" |
| 11277 | "/// \\returns A 256-bit vector of [8 x float] whose 128-bit elements are set\n" |
| 11278 | "/// equal to the broadcast value.\n" |
| 11279 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 11280 | "_mm256_broadcast_ps(__m128 const *__a)\n" |
| 11281 | "{\n" |
| 11282 | " __m128 __b = _mm_loadu_ps((const float *)__a);\n" |
| 11283 | " return (__m256)__builtin_shufflevector((__v4sf)__b, (__v4sf)__b,\n" |
| 11284 | " 0, 1, 2, 3, 0, 1, 2, 3);\n" |
| 11285 | "}\n" |
| 11286 | "\n" |
| 11287 | "/* SIMD load ops */\n" |
| 11288 | "/// Loads 4 double-precision floating point values from a 32-byte aligned\n" |
| 11289 | "/// memory location pointed to by \\a __p into a vector of [4 x double].\n" |
| 11290 | "///\n" |
| 11291 | "/// \\headerfile <x86intrin.h>\n" |
| 11292 | "///\n" |
| 11293 | "/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n" |
| 11294 | "///\n" |
| 11295 | "/// \\param __p\n" |
| 11296 | "/// A 32-byte aligned pointer to a memory location containing\n" |
| 11297 | "/// double-precision floating point values.\n" |
| 11298 | "/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n" |
| 11299 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 11300 | "_mm256_load_pd(double const *__p)\n" |
| 11301 | "{\n" |
| 11302 | " return *(__m256d *)__p;\n" |
| 11303 | "}\n" |
| 11304 | "\n" |
| 11305 | "/// Loads 8 single-precision floating point values from a 32-byte aligned\n" |
| 11306 | "/// memory location pointed to by \\a __p into a vector of [8 x float].\n" |
| 11307 | "///\n" |
| 11308 | "/// \\headerfile <x86intrin.h>\n" |
| 11309 | "///\n" |
| 11310 | "/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n" |
| 11311 | "///\n" |
| 11312 | "/// \\param __p\n" |
| 11313 | "/// A 32-byte aligned pointer to a memory location containing float values.\n" |
| 11314 | "/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n" |
| 11315 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 11316 | "_mm256_load_ps(float const *__p)\n" |
| 11317 | "{\n" |
| 11318 | " return *(__m256 *)__p;\n" |
| 11319 | "}\n" |
| 11320 | "\n" |
| 11321 | "/// Loads 4 double-precision floating point values from an unaligned\n" |
| 11322 | "/// memory location pointed to by \\a __p into a vector of [4 x double].\n" |
| 11323 | "///\n" |
| 11324 | "/// \\headerfile <x86intrin.h>\n" |
| 11325 | "///\n" |
| 11326 | "/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n" |
| 11327 | "///\n" |
| 11328 | "/// \\param __p\n" |
| 11329 | "/// A pointer to a memory location containing double-precision floating\n" |
| 11330 | "/// point values.\n" |
| 11331 | "/// \\returns A 256-bit vector of [4 x double] containing the moved values.\n" |
| 11332 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 11333 | "_mm256_loadu_pd(double const *__p)\n" |
| 11334 | "{\n" |
| 11335 | " struct __loadu_pd {\n" |
| 11336 | " __m256d __v;\n" |
| 11337 | " } __attribute__((__packed__, __may_alias__));\n" |
| 11338 | " return ((struct __loadu_pd*)__p)->__v;\n" |
| 11339 | "}\n" |
| 11340 | "\n" |
| 11341 | "/// Loads 8 single-precision floating point values from an unaligned\n" |
| 11342 | "/// memory location pointed to by \\a __p into a vector of [8 x float].\n" |
| 11343 | "///\n" |
| 11344 | "/// \\headerfile <x86intrin.h>\n" |
| 11345 | "///\n" |
| 11346 | "/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n" |
| 11347 | "///\n" |
| 11348 | "/// \\param __p\n" |
| 11349 | "/// A pointer to a memory location containing single-precision floating\n" |
| 11350 | "/// point values.\n" |
| 11351 | "/// \\returns A 256-bit vector of [8 x float] containing the moved values.\n" |
| 11352 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 11353 | "_mm256_loadu_ps(float const *__p)\n" |
| 11354 | "{\n" |
| 11355 | " struct __loadu_ps {\n" |
| 11356 | " __m256 __v;\n" |
| 11357 | " } __attribute__((__packed__, __may_alias__));\n" |
| 11358 | " return ((struct __loadu_ps*)__p)->__v;\n" |
| 11359 | "}\n" |
| 11360 | "\n" |
| 11361 | "/// Loads 256 bits of integer data from a 32-byte aligned memory\n" |
| 11362 | "/// location pointed to by \\a __p into elements of a 256-bit integer vector.\n" |
| 11363 | "///\n" |
| 11364 | "/// \\headerfile <x86intrin.h>\n" |
| 11365 | "///\n" |
| 11366 | "/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n" |
| 11367 | "///\n" |
| 11368 | "/// \\param __p\n" |
| 11369 | "/// A 32-byte aligned pointer to a 256-bit integer vector containing integer\n" |
| 11370 | "/// values.\n" |
| 11371 | "/// \\returns A 256-bit integer vector containing the moved values.\n" |
| 11372 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 11373 | "_mm256_load_si256(__m256i const *__p)\n" |
| 11374 | "{\n" |
| 11375 | " return *__p;\n" |
| 11376 | "}\n" |
| 11377 | "\n" |
| 11378 | "/// Loads 256 bits of integer data from an unaligned memory location\n" |
| 11379 | "/// pointed to by \\a __p into a 256-bit integer vector.\n" |
| 11380 | "///\n" |
| 11381 | "/// \\headerfile <x86intrin.h>\n" |
| 11382 | "///\n" |
| 11383 | "/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n" |
| 11384 | "///\n" |
| 11385 | "/// \\param __p\n" |
| 11386 | "/// A pointer to a 256-bit integer vector containing integer values.\n" |
| 11387 | "/// \\returns A 256-bit integer vector containing the moved values.\n" |
| 11388 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 11389 | "_mm256_loadu_si256(__m256i const *__p)\n" |
| 11390 | "{\n" |
| 11391 | " struct __loadu_si256 {\n" |
| 11392 | " __m256i __v;\n" |
| 11393 | " } __attribute__((__packed__, __may_alias__));\n" |
| 11394 | " return ((struct __loadu_si256*)__p)->__v;\n" |
| 11395 | "}\n" |
| 11396 | "\n" |
| 11397 | "/// Loads 256 bits of integer data from an unaligned memory location\n" |
| 11398 | "/// pointed to by \\a __p into a 256-bit integer vector. This intrinsic may\n" |
| 11399 | "/// perform better than \\c _mm256_loadu_si256 when the data crosses a cache\n" |
| 11400 | "/// line boundary.\n" |
| 11401 | "///\n" |
| 11402 | "/// \\headerfile <x86intrin.h>\n" |
| 11403 | "///\n" |
| 11404 | "/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n" |
| 11405 | "///\n" |
| 11406 | "/// \\param __p\n" |
| 11407 | "/// A pointer to a 256-bit integer vector containing integer values.\n" |
| 11408 | "/// \\returns A 256-bit integer vector containing the moved values.\n" |
| 11409 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 11410 | "_mm256_lddqu_si256(__m256i const *__p)\n" |
| 11411 | "{\n" |
| 11412 | " return (__m256i)__builtin_ia32_lddqu256((char const *)__p);\n" |
| 11413 | "}\n" |
| 11414 | "\n" |
| 11415 | "/* SIMD store ops */\n" |
| 11416 | "/// Stores double-precision floating point values from a 256-bit vector\n" |
| 11417 | "/// of [4 x double] to a 32-byte aligned memory location pointed to by\n" |
| 11418 | "/// \\a __p.\n" |
| 11419 | "///\n" |
| 11420 | "/// \\headerfile <x86intrin.h>\n" |
| 11421 | "///\n" |
| 11422 | "/// This intrinsic corresponds to the <c> VMOVAPD </c> instruction.\n" |
| 11423 | "///\n" |
| 11424 | "/// \\param __p\n" |
| 11425 | "/// A 32-byte aligned pointer to a memory location that will receive the\n" |
| 11426 | "/// double-precision floaing point values.\n" |
| 11427 | "/// \\param __a\n" |
| 11428 | "/// A 256-bit vector of [4 x double] containing the values to be moved.\n" |
| 11429 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11430 | "_mm256_store_pd(double *__p, __m256d __a)\n" |
| 11431 | "{\n" |
| 11432 | " *(__m256d *)__p = __a;\n" |
| 11433 | "}\n" |
| 11434 | "\n" |
| 11435 | "/// Stores single-precision floating point values from a 256-bit vector\n" |
| 11436 | "/// of [8 x float] to a 32-byte aligned memory location pointed to by \\a __p.\n" |
| 11437 | "///\n" |
| 11438 | "/// \\headerfile <x86intrin.h>\n" |
| 11439 | "///\n" |
| 11440 | "/// This intrinsic corresponds to the <c> VMOVAPS </c> instruction.\n" |
| 11441 | "///\n" |
| 11442 | "/// \\param __p\n" |
| 11443 | "/// A 32-byte aligned pointer to a memory location that will receive the\n" |
| 11444 | "/// float values.\n" |
| 11445 | "/// \\param __a\n" |
| 11446 | "/// A 256-bit vector of [8 x float] containing the values to be moved.\n" |
| 11447 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11448 | "_mm256_store_ps(float *__p, __m256 __a)\n" |
| 11449 | "{\n" |
| 11450 | " *(__m256 *)__p = __a;\n" |
| 11451 | "}\n" |
| 11452 | "\n" |
| 11453 | "/// Stores double-precision floating point values from a 256-bit vector\n" |
| 11454 | "/// of [4 x double] to an unaligned memory location pointed to by \\a __p.\n" |
| 11455 | "///\n" |
| 11456 | "/// \\headerfile <x86intrin.h>\n" |
| 11457 | "///\n" |
| 11458 | "/// This intrinsic corresponds to the <c> VMOVUPD </c> instruction.\n" |
| 11459 | "///\n" |
| 11460 | "/// \\param __p\n" |
| 11461 | "/// A pointer to a memory location that will receive the double-precision\n" |
| 11462 | "/// floating point values.\n" |
| 11463 | "/// \\param __a\n" |
| 11464 | "/// A 256-bit vector of [4 x double] containing the values to be moved.\n" |
| 11465 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11466 | "_mm256_storeu_pd(double *__p, __m256d __a)\n" |
| 11467 | "{\n" |
| 11468 | " struct __storeu_pd {\n" |
| 11469 | " __m256d __v;\n" |
| 11470 | " } __attribute__((__packed__, __may_alias__));\n" |
| 11471 | " ((struct __storeu_pd*)__p)->__v = __a;\n" |
| 11472 | "}\n" |
| 11473 | "\n" |
| 11474 | "/// Stores single-precision floating point values from a 256-bit vector\n" |
| 11475 | "/// of [8 x float] to an unaligned memory location pointed to by \\a __p.\n" |
| 11476 | "///\n" |
| 11477 | "/// \\headerfile <x86intrin.h>\n" |
| 11478 | "///\n" |
| 11479 | "/// This intrinsic corresponds to the <c> VMOVUPS </c> instruction.\n" |
| 11480 | "///\n" |
| 11481 | "/// \\param __p\n" |
| 11482 | "/// A pointer to a memory location that will receive the float values.\n" |
| 11483 | "/// \\param __a\n" |
| 11484 | "/// A 256-bit vector of [8 x float] containing the values to be moved.\n" |
| 11485 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11486 | "_mm256_storeu_ps(float *__p, __m256 __a)\n" |
| 11487 | "{\n" |
| 11488 | " struct __storeu_ps {\n" |
| 11489 | " __m256 __v;\n" |
| 11490 | " } __attribute__((__packed__, __may_alias__));\n" |
| 11491 | " ((struct __storeu_ps*)__p)->__v = __a;\n" |
| 11492 | "}\n" |
| 11493 | "\n" |
| 11494 | "/// Stores integer values from a 256-bit integer vector to a 32-byte\n" |
| 11495 | "/// aligned memory location pointed to by \\a __p.\n" |
| 11496 | "///\n" |
| 11497 | "/// \\headerfile <x86intrin.h>\n" |
| 11498 | "///\n" |
| 11499 | "/// This intrinsic corresponds to the <c> VMOVDQA </c> instruction.\n" |
| 11500 | "///\n" |
| 11501 | "/// \\param __p\n" |
| 11502 | "/// A 32-byte aligned pointer to a memory location that will receive the\n" |
| 11503 | "/// integer values.\n" |
| 11504 | "/// \\param __a\n" |
| 11505 | "/// A 256-bit integer vector containing the values to be moved.\n" |
| 11506 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11507 | "_mm256_store_si256(__m256i *__p, __m256i __a)\n" |
| 11508 | "{\n" |
| 11509 | " *__p = __a;\n" |
| 11510 | "}\n" |
| 11511 | "\n" |
| 11512 | "/// Stores integer values from a 256-bit integer vector to an unaligned\n" |
| 11513 | "/// memory location pointed to by \\a __p.\n" |
| 11514 | "///\n" |
| 11515 | "/// \\headerfile <x86intrin.h>\n" |
| 11516 | "///\n" |
| 11517 | "/// This intrinsic corresponds to the <c> VMOVDQU </c> instruction.\n" |
| 11518 | "///\n" |
| 11519 | "/// \\param __p\n" |
| 11520 | "/// A pointer to a memory location that will receive the integer values.\n" |
| 11521 | "/// \\param __a\n" |
| 11522 | "/// A 256-bit integer vector containing the values to be moved.\n" |
| 11523 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11524 | "_mm256_storeu_si256(__m256i *__p, __m256i __a)\n" |
| 11525 | "{\n" |
| 11526 | " struct __storeu_si256 {\n" |
| 11527 | " __m256i __v;\n" |
| 11528 | " } __attribute__((__packed__, __may_alias__));\n" |
| 11529 | " ((struct __storeu_si256*)__p)->__v = __a;\n" |
| 11530 | "}\n" |
| 11531 | "\n" |
| 11532 | "/* Conditional load ops */\n" |
| 11533 | "/// Conditionally loads double-precision floating point elements from a\n" |
| 11534 | "/// memory location pointed to by \\a __p into a 128-bit vector of\n" |
| 11535 | "/// [2 x double], depending on the mask bits associated with each data\n" |
| 11536 | "/// element.\n" |
| 11537 | "///\n" |
| 11538 | "/// \\headerfile <x86intrin.h>\n" |
| 11539 | "///\n" |
| 11540 | "/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n" |
| 11541 | "///\n" |
| 11542 | "/// \\param __p\n" |
| 11543 | "/// A pointer to a memory location that contains the double-precision\n" |
| 11544 | "/// floating point values.\n" |
| 11545 | "/// \\param __m\n" |
| 11546 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
| 11547 | "/// each data element represents the mask bits. If a mask bit is zero, the\n" |
| 11548 | "/// corresponding value in the memory location is not loaded and the\n" |
| 11549 | "/// corresponding field in the return value is set to zero.\n" |
| 11550 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n" |
| 11551 | "static __inline __m128d __DEFAULT_FN_ATTRS128\n" |
| 11552 | "_mm_maskload_pd(double const *__p, __m128i __m)\n" |
| 11553 | "{\n" |
| 11554 | " return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);\n" |
| 11555 | "}\n" |
| 11556 | "\n" |
| 11557 | "/// Conditionally loads double-precision floating point elements from a\n" |
| 11558 | "/// memory location pointed to by \\a __p into a 256-bit vector of\n" |
| 11559 | "/// [4 x double], depending on the mask bits associated with each data\n" |
| 11560 | "/// element.\n" |
| 11561 | "///\n" |
| 11562 | "/// \\headerfile <x86intrin.h>\n" |
| 11563 | "///\n" |
| 11564 | "/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n" |
| 11565 | "///\n" |
| 11566 | "/// \\param __p\n" |
| 11567 | "/// A pointer to a memory location that contains the double-precision\n" |
| 11568 | "/// floating point values.\n" |
| 11569 | "/// \\param __m\n" |
| 11570 | "/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n" |
| 11571 | "/// significant bit of each quadword element represents the mask bits. If a\n" |
| 11572 | "/// mask bit is zero, the corresponding value in the memory location is not\n" |
| 11573 | "/// loaded and the corresponding field in the return value is set to zero.\n" |
| 11574 | "/// \\returns A 256-bit vector of [4 x double] containing the loaded values.\n" |
| 11575 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 11576 | "_mm256_maskload_pd(double const *__p, __m256i __m)\n" |
| 11577 | "{\n" |
| 11578 | " return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,\n" |
| 11579 | " (__v4di)__m);\n" |
| 11580 | "}\n" |
| 11581 | "\n" |
| 11582 | "/// Conditionally loads single-precision floating point elements from a\n" |
| 11583 | "/// memory location pointed to by \\a __p into a 128-bit vector of\n" |
| 11584 | "/// [4 x float], depending on the mask bits associated with each data\n" |
| 11585 | "/// element.\n" |
| 11586 | "///\n" |
| 11587 | "/// \\headerfile <x86intrin.h>\n" |
| 11588 | "///\n" |
| 11589 | "/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n" |
| 11590 | "///\n" |
| 11591 | "/// \\param __p\n" |
| 11592 | "/// A pointer to a memory location that contains the single-precision\n" |
| 11593 | "/// floating point values.\n" |
| 11594 | "/// \\param __m\n" |
| 11595 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
| 11596 | "/// each data element represents the mask bits. If a mask bit is zero, the\n" |
| 11597 | "/// corresponding value in the memory location is not loaded and the\n" |
| 11598 | "/// corresponding field in the return value is set to zero.\n" |
| 11599 | "/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n" |
| 11600 | "static __inline __m128 __DEFAULT_FN_ATTRS128\n" |
| 11601 | "_mm_maskload_ps(float const *__p, __m128i __m)\n" |
| 11602 | "{\n" |
| 11603 | " return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);\n" |
| 11604 | "}\n" |
| 11605 | "\n" |
| 11606 | "/// Conditionally loads single-precision floating point elements from a\n" |
| 11607 | "/// memory location pointed to by \\a __p into a 256-bit vector of\n" |
| 11608 | "/// [8 x float], depending on the mask bits associated with each data\n" |
| 11609 | "/// element.\n" |
| 11610 | "///\n" |
| 11611 | "/// \\headerfile <x86intrin.h>\n" |
| 11612 | "///\n" |
| 11613 | "/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n" |
| 11614 | "///\n" |
| 11615 | "/// \\param __p\n" |
| 11616 | "/// A pointer to a memory location that contains the single-precision\n" |
| 11617 | "/// floating point values.\n" |
| 11618 | "/// \\param __m\n" |
| 11619 | "/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n" |
| 11620 | "/// significant bit of each dword element represents the mask bits. If a mask\n" |
| 11621 | "/// bit is zero, the corresponding value in the memory location is not loaded\n" |
| 11622 | "/// and the corresponding field in the return value is set to zero.\n" |
| 11623 | "/// \\returns A 256-bit vector of [8 x float] containing the loaded values.\n" |
| 11624 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 11625 | "_mm256_maskload_ps(float const *__p, __m256i __m)\n" |
| 11626 | "{\n" |
| 11627 | " return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);\n" |
| 11628 | "}\n" |
| 11629 | "\n" |
| 11630 | "/* Conditional store ops */\n" |
| 11631 | "/// Moves single-precision floating point values from a 256-bit vector\n" |
| 11632 | "/// of [8 x float] to a memory location pointed to by \\a __p, according to\n" |
| 11633 | "/// the specified mask.\n" |
| 11634 | "///\n" |
| 11635 | "/// \\headerfile <x86intrin.h>\n" |
| 11636 | "///\n" |
| 11637 | "/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n" |
| 11638 | "///\n" |
| 11639 | "/// \\param __p\n" |
| 11640 | "/// A pointer to a memory location that will receive the float values.\n" |
| 11641 | "/// \\param __m\n" |
| 11642 | "/// A 256-bit integer vector of [8 x dword] containing the mask. The most\n" |
| 11643 | "/// significant bit of each dword element in the mask vector represents the\n" |
| 11644 | "/// mask bits. If a mask bit is zero, the corresponding value from vector\n" |
| 11645 | "/// \\a __a is not stored and the corresponding field in the memory location\n" |
| 11646 | "/// pointed to by \\a __p is not changed.\n" |
| 11647 | "/// \\param __a\n" |
| 11648 | "/// A 256-bit vector of [8 x float] containing the values to be stored.\n" |
| 11649 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11650 | "_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)\n" |
| 11651 | "{\n" |
| 11652 | " __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);\n" |
| 11653 | "}\n" |
| 11654 | "\n" |
| 11655 | "/// Moves double-precision values from a 128-bit vector of [2 x double]\n" |
| 11656 | "/// to a memory location pointed to by \\a __p, according to the specified\n" |
| 11657 | "/// mask.\n" |
| 11658 | "///\n" |
| 11659 | "/// \\headerfile <x86intrin.h>\n" |
| 11660 | "///\n" |
| 11661 | "/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n" |
| 11662 | "///\n" |
| 11663 | "/// \\param __p\n" |
| 11664 | "/// A pointer to a memory location that will receive the float values.\n" |
| 11665 | "/// \\param __m\n" |
| 11666 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
| 11667 | "/// each field in the mask vector represents the mask bits. If a mask bit is\n" |
| 11668 | "/// zero, the corresponding value from vector \\a __a is not stored and the\n" |
| 11669 | "/// corresponding field in the memory location pointed to by \\a __p is not\n" |
| 11670 | "/// changed.\n" |
| 11671 | "/// \\param __a\n" |
| 11672 | "/// A 128-bit vector of [2 x double] containing the values to be stored.\n" |
| 11673 | "static __inline void __DEFAULT_FN_ATTRS128\n" |
| 11674 | "_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)\n" |
| 11675 | "{\n" |
| 11676 | " __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);\n" |
| 11677 | "}\n" |
| 11678 | "\n" |
| 11679 | "/// Moves double-precision values from a 256-bit vector of [4 x double]\n" |
| 11680 | "/// to a memory location pointed to by \\a __p, according to the specified\n" |
| 11681 | "/// mask.\n" |
| 11682 | "///\n" |
| 11683 | "/// \\headerfile <x86intrin.h>\n" |
| 11684 | "///\n" |
| 11685 | "/// This intrinsic corresponds to the <c> VMASKMOVPD </c> instruction.\n" |
| 11686 | "///\n" |
| 11687 | "/// \\param __p\n" |
| 11688 | "/// A pointer to a memory location that will receive the float values.\n" |
| 11689 | "/// \\param __m\n" |
| 11690 | "/// A 256-bit integer vector of [4 x quadword] containing the mask. The most\n" |
| 11691 | "/// significant bit of each quadword element in the mask vector represents\n" |
| 11692 | "/// the mask bits. If a mask bit is zero, the corresponding value from vector\n" |
| 11693 | "/// __a is not stored and the corresponding field in the memory location\n" |
| 11694 | "/// pointed to by \\a __p is not changed.\n" |
| 11695 | "/// \\param __a\n" |
| 11696 | "/// A 256-bit vector of [4 x double] containing the values to be stored.\n" |
| 11697 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11698 | "_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)\n" |
| 11699 | "{\n" |
| 11700 | " __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);\n" |
| 11701 | "}\n" |
| 11702 | "\n" |
| 11703 | "/// Moves single-precision floating point values from a 128-bit vector\n" |
| 11704 | "/// of [4 x float] to a memory location pointed to by \\a __p, according to\n" |
| 11705 | "/// the specified mask.\n" |
| 11706 | "///\n" |
| 11707 | "/// \\headerfile <x86intrin.h>\n" |
| 11708 | "///\n" |
| 11709 | "/// This intrinsic corresponds to the <c> VMASKMOVPS </c> instruction.\n" |
| 11710 | "///\n" |
| 11711 | "/// \\param __p\n" |
| 11712 | "/// A pointer to a memory location that will receive the float values.\n" |
| 11713 | "/// \\param __m\n" |
| 11714 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
| 11715 | "/// each field in the mask vector represents the mask bits. If a mask bit is\n" |
| 11716 | "/// zero, the corresponding value from vector __a is not stored and the\n" |
| 11717 | "/// corresponding field in the memory location pointed to by \\a __p is not\n" |
| 11718 | "/// changed.\n" |
| 11719 | "/// \\param __a\n" |
| 11720 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
| 11721 | "static __inline void __DEFAULT_FN_ATTRS128\n" |
| 11722 | "_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)\n" |
| 11723 | "{\n" |
| 11724 | " __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);\n" |
| 11725 | "}\n" |
| 11726 | "\n" |
| 11727 | "/* Cacheability support ops */\n" |
| 11728 | "/// Moves integer data from a 256-bit integer vector to a 32-byte\n" |
| 11729 | "/// aligned memory location. To minimize caching, the data is flagged as\n" |
| 11730 | "/// non-temporal (unlikely to be used again soon).\n" |
| 11731 | "///\n" |
| 11732 | "/// \\headerfile <x86intrin.h>\n" |
| 11733 | "///\n" |
| 11734 | "/// This intrinsic corresponds to the <c> VMOVNTDQ </c> instruction.\n" |
| 11735 | "///\n" |
| 11736 | "/// \\param __a\n" |
| 11737 | "/// A pointer to a 32-byte aligned memory location that will receive the\n" |
| 11738 | "/// integer values.\n" |
| 11739 | "/// \\param __b\n" |
| 11740 | "/// A 256-bit integer vector containing the values to be moved.\n" |
| 11741 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11742 | "_mm256_stream_si256(__m256i *__a, __m256i __b)\n" |
| 11743 | "{\n" |
| 11744 | " typedef __v4di __v4di_aligned __attribute__((aligned(32)));\n" |
| 11745 | " __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);\n" |
| 11746 | "}\n" |
| 11747 | "\n" |
| 11748 | "/// Moves double-precision values from a 256-bit vector of [4 x double]\n" |
| 11749 | "/// to a 32-byte aligned memory location. To minimize caching, the data is\n" |
| 11750 | "/// flagged as non-temporal (unlikely to be used again soon).\n" |
| 11751 | "///\n" |
| 11752 | "/// \\headerfile <x86intrin.h>\n" |
| 11753 | "///\n" |
| 11754 | "/// This intrinsic corresponds to the <c> VMOVNTPD </c> instruction.\n" |
| 11755 | "///\n" |
| 11756 | "/// \\param __a\n" |
| 11757 | "/// A pointer to a 32-byte aligned memory location that will receive the\n" |
| 11758 | "/// double-precision floating-point values.\n" |
| 11759 | "/// \\param __b\n" |
| 11760 | "/// A 256-bit vector of [4 x double] containing the values to be moved.\n" |
| 11761 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11762 | "_mm256_stream_pd(double *__a, __m256d __b)\n" |
| 11763 | "{\n" |
| 11764 | " typedef __v4df __v4df_aligned __attribute__((aligned(32)));\n" |
| 11765 | " __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);\n" |
| 11766 | "}\n" |
| 11767 | "\n" |
| 11768 | "/// Moves single-precision floating point values from a 256-bit vector\n" |
| 11769 | "/// of [8 x float] to a 32-byte aligned memory location. To minimize\n" |
| 11770 | "/// caching, the data is flagged as non-temporal (unlikely to be used again\n" |
| 11771 | "/// soon).\n" |
| 11772 | "///\n" |
| 11773 | "/// \\headerfile <x86intrin.h>\n" |
| 11774 | "///\n" |
| 11775 | "/// This intrinsic corresponds to the <c> VMOVNTPS </c> instruction.\n" |
| 11776 | "///\n" |
| 11777 | "/// \\param __p\n" |
| 11778 | "/// A pointer to a 32-byte aligned memory location that will receive the\n" |
| 11779 | "/// single-precision floating point values.\n" |
| 11780 | "/// \\param __a\n" |
| 11781 | "/// A 256-bit vector of [8 x float] containing the values to be moved.\n" |
| 11782 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 11783 | "_mm256_stream_ps(float *__p, __m256 __a)\n" |
| 11784 | "{\n" |
| 11785 | " typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));\n" |
| 11786 | " __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);\n" |
| 11787 | "}\n" |
| 11788 | "\n" |
| 11789 | "/* Create vectors */\n" |
| 11790 | "/// Create a 256-bit vector of [4 x double] with undefined values.\n" |
| 11791 | "///\n" |
| 11792 | "/// \\headerfile <x86intrin.h>\n" |
| 11793 | "///\n" |
| 11794 | "/// This intrinsic has no corresponding instruction.\n" |
| 11795 | "///\n" |
| 11796 | "/// \\returns A 256-bit vector of [4 x double] containing undefined values.\n" |
| 11797 | "static __inline__ __m256d __DEFAULT_FN_ATTRS\n" |
| 11798 | "_mm256_undefined_pd(void)\n" |
| 11799 | "{\n" |
| 11800 | " return (__m256d)__builtin_ia32_undef256();\n" |
| 11801 | "}\n" |
| 11802 | "\n" |
| 11803 | "/// Create a 256-bit vector of [8 x float] with undefined values.\n" |
| 11804 | "///\n" |
| 11805 | "/// \\headerfile <x86intrin.h>\n" |
| 11806 | "///\n" |
| 11807 | "/// This intrinsic has no corresponding instruction.\n" |
| 11808 | "///\n" |
| 11809 | "/// \\returns A 256-bit vector of [8 x float] containing undefined values.\n" |
| 11810 | "static __inline__ __m256 __DEFAULT_FN_ATTRS\n" |
| 11811 | "_mm256_undefined_ps(void)\n" |
| 11812 | "{\n" |
| 11813 | " return (__m256)__builtin_ia32_undef256();\n" |
| 11814 | "}\n" |
| 11815 | "\n" |
| 11816 | "/// Create a 256-bit integer vector with undefined values.\n" |
| 11817 | "///\n" |
| 11818 | "/// \\headerfile <x86intrin.h>\n" |
| 11819 | "///\n" |
| 11820 | "/// This intrinsic has no corresponding instruction.\n" |
| 11821 | "///\n" |
| 11822 | "/// \\returns A 256-bit integer vector containing undefined values.\n" |
| 11823 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
| 11824 | "_mm256_undefined_si256(void)\n" |
| 11825 | "{\n" |
| 11826 | " return (__m256i)__builtin_ia32_undef256();\n" |
| 11827 | "}\n" |
| 11828 | "\n" |
| 11829 | "/// Constructs a 256-bit floating-point vector of [4 x double]\n" |
| 11830 | "/// initialized with the specified double-precision floating-point values.\n" |
| 11831 | "///\n" |
| 11832 | "/// \\headerfile <x86intrin.h>\n" |
| 11833 | "///\n" |
| 11834 | "/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n" |
| 11835 | "/// instruction.\n" |
| 11836 | "///\n" |
| 11837 | "/// \\param __a\n" |
| 11838 | "/// A double-precision floating-point value used to initialize bits [255:192]\n" |
| 11839 | "/// of the result.\n" |
| 11840 | "/// \\param __b\n" |
| 11841 | "/// A double-precision floating-point value used to initialize bits [191:128]\n" |
| 11842 | "/// of the result.\n" |
| 11843 | "/// \\param __c\n" |
| 11844 | "/// A double-precision floating-point value used to initialize bits [127:64]\n" |
| 11845 | "/// of the result.\n" |
| 11846 | "/// \\param __d\n" |
| 11847 | "/// A double-precision floating-point value used to initialize bits [63:0]\n" |
| 11848 | "/// of the result.\n" |
| 11849 | "/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n" |
| 11850 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 11851 | "_mm256_set_pd(double __a, double __b, double __c, double __d)\n" |
| 11852 | "{\n" |
| 11853 | " return __extension__ (__m256d){ __d, __c, __b, __a };\n" |
| 11854 | "}\n" |
| 11855 | "\n" |
| 11856 | "/// Constructs a 256-bit floating-point vector of [8 x float] initialized\n" |
| 11857 | "/// with the specified single-precision floating-point values.\n" |
| 11858 | "///\n" |
| 11859 | "/// \\headerfile <x86intrin.h>\n" |
| 11860 | "///\n" |
| 11861 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 11862 | "/// instruction.\n" |
| 11863 | "///\n" |
| 11864 | "/// \\param __a\n" |
| 11865 | "/// A single-precision floating-point value used to initialize bits [255:224]\n" |
| 11866 | "/// of the result.\n" |
| 11867 | "/// \\param __b\n" |
| 11868 | "/// A single-precision floating-point value used to initialize bits [223:192]\n" |
| 11869 | "/// of the result.\n" |
| 11870 | "/// \\param __c\n" |
| 11871 | "/// A single-precision floating-point value used to initialize bits [191:160]\n" |
| 11872 | "/// of the result.\n" |
| 11873 | "/// \\param __d\n" |
| 11874 | "/// A single-precision floating-point value used to initialize bits [159:128]\n" |
| 11875 | "/// of the result.\n" |
| 11876 | "/// \\param __e\n" |
| 11877 | "/// A single-precision floating-point value used to initialize bits [127:96]\n" |
| 11878 | "/// of the result.\n" |
| 11879 | "/// \\param __f\n" |
| 11880 | "/// A single-precision floating-point value used to initialize bits [95:64]\n" |
| 11881 | "/// of the result.\n" |
| 11882 | "/// \\param __g\n" |
| 11883 | "/// A single-precision floating-point value used to initialize bits [63:32]\n" |
| 11884 | "/// of the result.\n" |
| 11885 | "/// \\param __h\n" |
| 11886 | "/// A single-precision floating-point value used to initialize bits [31:0]\n" |
| 11887 | "/// of the result.\n" |
| 11888 | "/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n" |
| 11889 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 11890 | "_mm256_set_ps(float __a, float __b, float __c, float __d,\n" |
| 11891 | " float __e, float __f, float __g, float __h)\n" |
| 11892 | "{\n" |
| 11893 | " return __extension__ (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };\n" |
| 11894 | "}\n" |
| 11895 | "\n" |
| 11896 | "/// Constructs a 256-bit integer vector initialized with the specified\n" |
| 11897 | "/// 32-bit integral values.\n" |
| 11898 | "///\n" |
| 11899 | "/// \\headerfile <x86intrin.h>\n" |
| 11900 | "///\n" |
| 11901 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 11902 | "/// instruction.\n" |
| 11903 | "///\n" |
| 11904 | "/// \\param __i0\n" |
| 11905 | "/// A 32-bit integral value used to initialize bits [255:224] of the result.\n" |
| 11906 | "/// \\param __i1\n" |
| 11907 | "/// A 32-bit integral value used to initialize bits [223:192] of the result.\n" |
| 11908 | "/// \\param __i2\n" |
| 11909 | "/// A 32-bit integral value used to initialize bits [191:160] of the result.\n" |
| 11910 | "/// \\param __i3\n" |
| 11911 | "/// A 32-bit integral value used to initialize bits [159:128] of the result.\n" |
| 11912 | "/// \\param __i4\n" |
| 11913 | "/// A 32-bit integral value used to initialize bits [127:96] of the result.\n" |
| 11914 | "/// \\param __i5\n" |
| 11915 | "/// A 32-bit integral value used to initialize bits [95:64] of the result.\n" |
| 11916 | "/// \\param __i6\n" |
| 11917 | "/// A 32-bit integral value used to initialize bits [63:32] of the result.\n" |
| 11918 | "/// \\param __i7\n" |
| 11919 | "/// A 32-bit integral value used to initialize bits [31:0] of the result.\n" |
| 11920 | "/// \\returns An initialized 256-bit integer vector.\n" |
| 11921 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 11922 | "_mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,\n" |
| 11923 | " int __i4, int __i5, int __i6, int __i7)\n" |
| 11924 | "{\n" |
| 11925 | " return __extension__ (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };\n" |
| 11926 | "}\n" |
| 11927 | "\n" |
| 11928 | "/// Constructs a 256-bit integer vector initialized with the specified\n" |
| 11929 | "/// 16-bit integral values.\n" |
| 11930 | "///\n" |
| 11931 | "/// \\headerfile <x86intrin.h>\n" |
| 11932 | "///\n" |
| 11933 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 11934 | "/// instruction.\n" |
| 11935 | "///\n" |
| 11936 | "/// \\param __w15\n" |
| 11937 | "/// A 16-bit integral value used to initialize bits [255:240] of the result.\n" |
| 11938 | "/// \\param __w14\n" |
| 11939 | "/// A 16-bit integral value used to initialize bits [239:224] of the result.\n" |
| 11940 | "/// \\param __w13\n" |
| 11941 | "/// A 16-bit integral value used to initialize bits [223:208] of the result.\n" |
| 11942 | "/// \\param __w12\n" |
| 11943 | "/// A 16-bit integral value used to initialize bits [207:192] of the result.\n" |
| 11944 | "/// \\param __w11\n" |
| 11945 | "/// A 16-bit integral value used to initialize bits [191:176] of the result.\n" |
| 11946 | "/// \\param __w10\n" |
| 11947 | "/// A 16-bit integral value used to initialize bits [175:160] of the result.\n" |
| 11948 | "/// \\param __w09\n" |
| 11949 | "/// A 16-bit integral value used to initialize bits [159:144] of the result.\n" |
| 11950 | "/// \\param __w08\n" |
| 11951 | "/// A 16-bit integral value used to initialize bits [143:128] of the result.\n" |
| 11952 | "/// \\param __w07\n" |
| 11953 | "/// A 16-bit integral value used to initialize bits [127:112] of the result.\n" |
| 11954 | "/// \\param __w06\n" |
| 11955 | "/// A 16-bit integral value used to initialize bits [111:96] of the result.\n" |
| 11956 | "/// \\param __w05\n" |
| 11957 | "/// A 16-bit integral value used to initialize bits [95:80] of the result.\n" |
| 11958 | "/// \\param __w04\n" |
| 11959 | "/// A 16-bit integral value used to initialize bits [79:64] of the result.\n" |
| 11960 | "/// \\param __w03\n" |
| 11961 | "/// A 16-bit integral value used to initialize bits [63:48] of the result.\n" |
| 11962 | "/// \\param __w02\n" |
| 11963 | "/// A 16-bit integral value used to initialize bits [47:32] of the result.\n" |
| 11964 | "/// \\param __w01\n" |
| 11965 | "/// A 16-bit integral value used to initialize bits [31:16] of the result.\n" |
| 11966 | "/// \\param __w00\n" |
| 11967 | "/// A 16-bit integral value used to initialize bits [15:0] of the result.\n" |
| 11968 | "/// \\returns An initialized 256-bit integer vector.\n" |
| 11969 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 11970 | "_mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,\n" |
| 11971 | " short __w11, short __w10, short __w09, short __w08,\n" |
| 11972 | " short __w07, short __w06, short __w05, short __w04,\n" |
| 11973 | " short __w03, short __w02, short __w01, short __w00)\n" |
| 11974 | "{\n" |
| 11975 | " return __extension__ (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,\n" |
| 11976 | " __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };\n" |
| 11977 | "}\n" |
| 11978 | "\n" |
| 11979 | "/// Constructs a 256-bit integer vector initialized with the specified\n" |
| 11980 | "/// 8-bit integral values.\n" |
| 11981 | "///\n" |
| 11982 | "/// \\headerfile <x86intrin.h>\n" |
| 11983 | "///\n" |
| 11984 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 11985 | "/// instruction.\n" |
| 11986 | "///\n" |
| 11987 | "/// \\param __b31\n" |
| 11988 | "/// An 8-bit integral value used to initialize bits [255:248] of the result.\n" |
| 11989 | "/// \\param __b30\n" |
| 11990 | "/// An 8-bit integral value used to initialize bits [247:240] of the result.\n" |
| 11991 | "/// \\param __b29\n" |
| 11992 | "/// An 8-bit integral value used to initialize bits [239:232] of the result.\n" |
| 11993 | "/// \\param __b28\n" |
| 11994 | "/// An 8-bit integral value used to initialize bits [231:224] of the result.\n" |
| 11995 | "/// \\param __b27\n" |
| 11996 | "/// An 8-bit integral value used to initialize bits [223:216] of the result.\n" |
| 11997 | "/// \\param __b26\n" |
| 11998 | "/// An 8-bit integral value used to initialize bits [215:208] of the result.\n" |
| 11999 | "/// \\param __b25\n" |
| 12000 | "/// An 8-bit integral value used to initialize bits [207:200] of the result.\n" |
| 12001 | "/// \\param __b24\n" |
| 12002 | "/// An 8-bit integral value used to initialize bits [199:192] of the result.\n" |
| 12003 | "/// \\param __b23\n" |
| 12004 | "/// An 8-bit integral value used to initialize bits [191:184] of the result.\n" |
| 12005 | "/// \\param __b22\n" |
| 12006 | "/// An 8-bit integral value used to initialize bits [183:176] of the result.\n" |
| 12007 | "/// \\param __b21\n" |
| 12008 | "/// An 8-bit integral value used to initialize bits [175:168] of the result.\n" |
| 12009 | "/// \\param __b20\n" |
| 12010 | "/// An 8-bit integral value used to initialize bits [167:160] of the result.\n" |
| 12011 | "/// \\param __b19\n" |
| 12012 | "/// An 8-bit integral value used to initialize bits [159:152] of the result.\n" |
| 12013 | "/// \\param __b18\n" |
| 12014 | "/// An 8-bit integral value used to initialize bits [151:144] of the result.\n" |
| 12015 | "/// \\param __b17\n" |
| 12016 | "/// An 8-bit integral value used to initialize bits [143:136] of the result.\n" |
| 12017 | "/// \\param __b16\n" |
| 12018 | "/// An 8-bit integral value used to initialize bits [135:128] of the result.\n" |
| 12019 | "/// \\param __b15\n" |
| 12020 | "/// An 8-bit integral value used to initialize bits [127:120] of the result.\n" |
| 12021 | "/// \\param __b14\n" |
| 12022 | "/// An 8-bit integral value used to initialize bits [119:112] of the result.\n" |
| 12023 | "/// \\param __b13\n" |
| 12024 | "/// An 8-bit integral value used to initialize bits [111:104] of the result.\n" |
| 12025 | "/// \\param __b12\n" |
| 12026 | "/// An 8-bit integral value used to initialize bits [103:96] of the result.\n" |
| 12027 | "/// \\param __b11\n" |
| 12028 | "/// An 8-bit integral value used to initialize bits [95:88] of the result.\n" |
| 12029 | "/// \\param __b10\n" |
| 12030 | "/// An 8-bit integral value used to initialize bits [87:80] of the result.\n" |
| 12031 | "/// \\param __b09\n" |
| 12032 | "/// An 8-bit integral value used to initialize bits [79:72] of the result.\n" |
| 12033 | "/// \\param __b08\n" |
| 12034 | "/// An 8-bit integral value used to initialize bits [71:64] of the result.\n" |
| 12035 | "/// \\param __b07\n" |
| 12036 | "/// An 8-bit integral value used to initialize bits [63:56] of the result.\n" |
| 12037 | "/// \\param __b06\n" |
| 12038 | "/// An 8-bit integral value used to initialize bits [55:48] of the result.\n" |
| 12039 | "/// \\param __b05\n" |
| 12040 | "/// An 8-bit integral value used to initialize bits [47:40] of the result.\n" |
| 12041 | "/// \\param __b04\n" |
| 12042 | "/// An 8-bit integral value used to initialize bits [39:32] of the result.\n" |
| 12043 | "/// \\param __b03\n" |
| 12044 | "/// An 8-bit integral value used to initialize bits [31:24] of the result.\n" |
| 12045 | "/// \\param __b02\n" |
| 12046 | "/// An 8-bit integral value used to initialize bits [23:16] of the result.\n" |
| 12047 | "/// \\param __b01\n" |
| 12048 | "/// An 8-bit integral value used to initialize bits [15:8] of the result.\n" |
| 12049 | "/// \\param __b00\n" |
| 12050 | "/// An 8-bit integral value used to initialize bits [7:0] of the result.\n" |
| 12051 | "/// \\returns An initialized 256-bit integer vector.\n" |
| 12052 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12053 | "_mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,\n" |
| 12054 | " char __b27, char __b26, char __b25, char __b24,\n" |
| 12055 | " char __b23, char __b22, char __b21, char __b20,\n" |
| 12056 | " char __b19, char __b18, char __b17, char __b16,\n" |
| 12057 | " char __b15, char __b14, char __b13, char __b12,\n" |
| 12058 | " char __b11, char __b10, char __b09, char __b08,\n" |
| 12059 | " char __b07, char __b06, char __b05, char __b04,\n" |
| 12060 | " char __b03, char __b02, char __b01, char __b00)\n" |
| 12061 | "{\n" |
| 12062 | " return __extension__ (__m256i)(__v32qi){\n" |
| 12063 | " __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n" |
| 12064 | " __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n" |
| 12065 | " __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n" |
| 12066 | " __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31\n" |
| 12067 | " };\n" |
| 12068 | "}\n" |
| 12069 | "\n" |
| 12070 | "/// Constructs a 256-bit integer vector initialized with the specified\n" |
| 12071 | "/// 64-bit integral values.\n" |
| 12072 | "///\n" |
| 12073 | "/// \\headerfile <x86intrin.h>\n" |
| 12074 | "///\n" |
| 12075 | "/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n" |
| 12076 | "/// instruction.\n" |
| 12077 | "///\n" |
| 12078 | "/// \\param __a\n" |
| 12079 | "/// A 64-bit integral value used to initialize bits [255:192] of the result.\n" |
| 12080 | "/// \\param __b\n" |
| 12081 | "/// A 64-bit integral value used to initialize bits [191:128] of the result.\n" |
| 12082 | "/// \\param __c\n" |
| 12083 | "/// A 64-bit integral value used to initialize bits [127:64] of the result.\n" |
| 12084 | "/// \\param __d\n" |
| 12085 | "/// A 64-bit integral value used to initialize bits [63:0] of the result.\n" |
| 12086 | "/// \\returns An initialized 256-bit integer vector.\n" |
| 12087 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12088 | "_mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d)\n" |
| 12089 | "{\n" |
| 12090 | " return __extension__ (__m256i)(__v4di){ __d, __c, __b, __a };\n" |
| 12091 | "}\n" |
| 12092 | "\n" |
| 12093 | "/* Create vectors with elements in reverse order */\n" |
| 12094 | "/// Constructs a 256-bit floating-point vector of [4 x double],\n" |
| 12095 | "/// initialized in reverse order with the specified double-precision\n" |
| 12096 | "/// floating-point values.\n" |
| 12097 | "///\n" |
| 12098 | "/// \\headerfile <x86intrin.h>\n" |
| 12099 | "///\n" |
| 12100 | "/// This intrinsic corresponds to the <c> VUNPCKLPD+VINSERTF128 </c>\n" |
| 12101 | "/// instruction.\n" |
| 12102 | "///\n" |
| 12103 | "/// \\param __a\n" |
| 12104 | "/// A double-precision floating-point value used to initialize bits [63:0]\n" |
| 12105 | "/// of the result.\n" |
| 12106 | "/// \\param __b\n" |
| 12107 | "/// A double-precision floating-point value used to initialize bits [127:64]\n" |
| 12108 | "/// of the result.\n" |
| 12109 | "/// \\param __c\n" |
| 12110 | "/// A double-precision floating-point value used to initialize bits [191:128]\n" |
| 12111 | "/// of the result.\n" |
| 12112 | "/// \\param __d\n" |
| 12113 | "/// A double-precision floating-point value used to initialize bits [255:192]\n" |
| 12114 | "/// of the result.\n" |
| 12115 | "/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n" |
| 12116 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 12117 | "_mm256_setr_pd(double __a, double __b, double __c, double __d)\n" |
| 12118 | "{\n" |
| 12119 | " return _mm256_set_pd(__d, __c, __b, __a);\n" |
| 12120 | "}\n" |
| 12121 | "\n" |
| 12122 | "/// Constructs a 256-bit floating-point vector of [8 x float],\n" |
| 12123 | "/// initialized in reverse order with the specified single-precision\n" |
| 12124 | "/// float-point values.\n" |
| 12125 | "///\n" |
| 12126 | "/// \\headerfile <x86intrin.h>\n" |
| 12127 | "///\n" |
| 12128 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 12129 | "/// instruction.\n" |
| 12130 | "///\n" |
| 12131 | "/// \\param __a\n" |
| 12132 | "/// A single-precision floating-point value used to initialize bits [31:0]\n" |
| 12133 | "/// of the result.\n" |
| 12134 | "/// \\param __b\n" |
| 12135 | "/// A single-precision floating-point value used to initialize bits [63:32]\n" |
| 12136 | "/// of the result.\n" |
| 12137 | "/// \\param __c\n" |
| 12138 | "/// A single-precision floating-point value used to initialize bits [95:64]\n" |
| 12139 | "/// of the result.\n" |
| 12140 | "/// \\param __d\n" |
| 12141 | "/// A single-precision floating-point value used to initialize bits [127:96]\n" |
| 12142 | "/// of the result.\n" |
| 12143 | "/// \\param __e\n" |
| 12144 | "/// A single-precision floating-point value used to initialize bits [159:128]\n" |
| 12145 | "/// of the result.\n" |
| 12146 | "/// \\param __f\n" |
| 12147 | "/// A single-precision floating-point value used to initialize bits [191:160]\n" |
| 12148 | "/// of the result.\n" |
| 12149 | "/// \\param __g\n" |
| 12150 | "/// A single-precision floating-point value used to initialize bits [223:192]\n" |
| 12151 | "/// of the result.\n" |
| 12152 | "/// \\param __h\n" |
| 12153 | "/// A single-precision floating-point value used to initialize bits [255:224]\n" |
| 12154 | "/// of the result.\n" |
| 12155 | "/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n" |
| 12156 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 12157 | "_mm256_setr_ps(float __a, float __b, float __c, float __d,\n" |
| 12158 | " float __e, float __f, float __g, float __h)\n" |
| 12159 | "{\n" |
| 12160 | " return _mm256_set_ps(__h, __g, __f, __e, __d, __c, __b, __a);\n" |
| 12161 | "}\n" |
| 12162 | "\n" |
| 12163 | "/// Constructs a 256-bit integer vector, initialized in reverse order\n" |
| 12164 | "/// with the specified 32-bit integral values.\n" |
| 12165 | "///\n" |
| 12166 | "/// \\headerfile <x86intrin.h>\n" |
| 12167 | "///\n" |
| 12168 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 12169 | "/// instruction.\n" |
| 12170 | "///\n" |
| 12171 | "/// \\param __i0\n" |
| 12172 | "/// A 32-bit integral value used to initialize bits [31:0] of the result.\n" |
| 12173 | "/// \\param __i1\n" |
| 12174 | "/// A 32-bit integral value used to initialize bits [63:32] of the result.\n" |
| 12175 | "/// \\param __i2\n" |
| 12176 | "/// A 32-bit integral value used to initialize bits [95:64] of the result.\n" |
| 12177 | "/// \\param __i3\n" |
| 12178 | "/// A 32-bit integral value used to initialize bits [127:96] of the result.\n" |
| 12179 | "/// \\param __i4\n" |
| 12180 | "/// A 32-bit integral value used to initialize bits [159:128] of the result.\n" |
| 12181 | "/// \\param __i5\n" |
| 12182 | "/// A 32-bit integral value used to initialize bits [191:160] of the result.\n" |
| 12183 | "/// \\param __i6\n" |
| 12184 | "/// A 32-bit integral value used to initialize bits [223:192] of the result.\n" |
| 12185 | "/// \\param __i7\n" |
| 12186 | "/// A 32-bit integral value used to initialize bits [255:224] of the result.\n" |
| 12187 | "/// \\returns An initialized 256-bit integer vector.\n" |
| 12188 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12189 | "_mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,\n" |
| 12190 | " int __i4, int __i5, int __i6, int __i7)\n" |
| 12191 | "{\n" |
| 12192 | " return _mm256_set_epi32(__i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0);\n" |
| 12193 | "}\n" |
| 12194 | "\n" |
| 12195 | "/// Constructs a 256-bit integer vector, initialized in reverse order\n" |
| 12196 | "/// with the specified 16-bit integral values.\n" |
| 12197 | "///\n" |
| 12198 | "/// \\headerfile <x86intrin.h>\n" |
| 12199 | "///\n" |
| 12200 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 12201 | "/// instruction.\n" |
| 12202 | "///\n" |
| 12203 | "/// \\param __w15\n" |
| 12204 | "/// A 16-bit integral value used to initialize bits [15:0] of the result.\n" |
| 12205 | "/// \\param __w14\n" |
| 12206 | "/// A 16-bit integral value used to initialize bits [31:16] of the result.\n" |
| 12207 | "/// \\param __w13\n" |
| 12208 | "/// A 16-bit integral value used to initialize bits [47:32] of the result.\n" |
| 12209 | "/// \\param __w12\n" |
| 12210 | "/// A 16-bit integral value used to initialize bits [63:48] of the result.\n" |
| 12211 | "/// \\param __w11\n" |
| 12212 | "/// A 16-bit integral value used to initialize bits [79:64] of the result.\n" |
| 12213 | "/// \\param __w10\n" |
| 12214 | "/// A 16-bit integral value used to initialize bits [95:80] of the result.\n" |
| 12215 | "/// \\param __w09\n" |
| 12216 | "/// A 16-bit integral value used to initialize bits [111:96] of the result.\n" |
| 12217 | "/// \\param __w08\n" |
| 12218 | "/// A 16-bit integral value used to initialize bits [127:112] of the result.\n" |
| 12219 | "/// \\param __w07\n" |
| 12220 | "/// A 16-bit integral value used to initialize bits [143:128] of the result.\n" |
| 12221 | "/// \\param __w06\n" |
| 12222 | "/// A 16-bit integral value used to initialize bits [159:144] of the result.\n" |
| 12223 | "/// \\param __w05\n" |
| 12224 | "/// A 16-bit integral value used to initialize bits [175:160] of the result.\n" |
| 12225 | "/// \\param __w04\n" |
| 12226 | "/// A 16-bit integral value used to initialize bits [191:176] of the result.\n" |
| 12227 | "/// \\param __w03\n" |
| 12228 | "/// A 16-bit integral value used to initialize bits [207:192] of the result.\n" |
| 12229 | "/// \\param __w02\n" |
| 12230 | "/// A 16-bit integral value used to initialize bits [223:208] of the result.\n" |
| 12231 | "/// \\param __w01\n" |
| 12232 | "/// A 16-bit integral value used to initialize bits [239:224] of the result.\n" |
| 12233 | "/// \\param __w00\n" |
| 12234 | "/// A 16-bit integral value used to initialize bits [255:240] of the result.\n" |
| 12235 | "/// \\returns An initialized 256-bit integer vector.\n" |
| 12236 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12237 | "_mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,\n" |
| 12238 | " short __w11, short __w10, short __w09, short __w08,\n" |
| 12239 | " short __w07, short __w06, short __w05, short __w04,\n" |
| 12240 | " short __w03, short __w02, short __w01, short __w00)\n" |
| 12241 | "{\n" |
| 12242 | " return _mm256_set_epi16(__w00, __w01, __w02, __w03,\n" |
| 12243 | " __w04, __w05, __w06, __w07,\n" |
| 12244 | " __w08, __w09, __w10, __w11,\n" |
| 12245 | " __w12, __w13, __w14, __w15);\n" |
| 12246 | "}\n" |
| 12247 | "\n" |
| 12248 | "/// Constructs a 256-bit integer vector, initialized in reverse order\n" |
| 12249 | "/// with the specified 8-bit integral values.\n" |
| 12250 | "///\n" |
| 12251 | "/// \\headerfile <x86intrin.h>\n" |
| 12252 | "///\n" |
| 12253 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 12254 | "/// instruction.\n" |
| 12255 | "///\n" |
| 12256 | "/// \\param __b31\n" |
| 12257 | "/// An 8-bit integral value used to initialize bits [7:0] of the result.\n" |
| 12258 | "/// \\param __b30\n" |
| 12259 | "/// An 8-bit integral value used to initialize bits [15:8] of the result.\n" |
| 12260 | "/// \\param __b29\n" |
| 12261 | "/// An 8-bit integral value used to initialize bits [23:16] of the result.\n" |
| 12262 | "/// \\param __b28\n" |
| 12263 | "/// An 8-bit integral value used to initialize bits [31:24] of the result.\n" |
| 12264 | "/// \\param __b27\n" |
| 12265 | "/// An 8-bit integral value used to initialize bits [39:32] of the result.\n" |
| 12266 | "/// \\param __b26\n" |
| 12267 | "/// An 8-bit integral value used to initialize bits [47:40] of the result.\n" |
| 12268 | "/// \\param __b25\n" |
| 12269 | "/// An 8-bit integral value used to initialize bits [55:48] of the result.\n" |
| 12270 | "/// \\param __b24\n" |
| 12271 | "/// An 8-bit integral value used to initialize bits [63:56] of the result.\n" |
| 12272 | "/// \\param __b23\n" |
| 12273 | "/// An 8-bit integral value used to initialize bits [71:64] of the result.\n" |
| 12274 | "/// \\param __b22\n" |
| 12275 | "/// An 8-bit integral value used to initialize bits [79:72] of the result.\n" |
| 12276 | "/// \\param __b21\n" |
| 12277 | "/// An 8-bit integral value used to initialize bits [87:80] of the result.\n" |
| 12278 | "/// \\param __b20\n" |
| 12279 | "/// An 8-bit integral value used to initialize bits [95:88] of the result.\n" |
| 12280 | "/// \\param __b19\n" |
| 12281 | "/// An 8-bit integral value used to initialize bits [103:96] of the result.\n" |
| 12282 | "/// \\param __b18\n" |
| 12283 | "/// An 8-bit integral value used to initialize bits [111:104] of the result.\n" |
| 12284 | "/// \\param __b17\n" |
| 12285 | "/// An 8-bit integral value used to initialize bits [119:112] of the result.\n" |
| 12286 | "/// \\param __b16\n" |
| 12287 | "/// An 8-bit integral value used to initialize bits [127:120] of the result.\n" |
| 12288 | "/// \\param __b15\n" |
| 12289 | "/// An 8-bit integral value used to initialize bits [135:128] of the result.\n" |
| 12290 | "/// \\param __b14\n" |
| 12291 | "/// An 8-bit integral value used to initialize bits [143:136] of the result.\n" |
| 12292 | "/// \\param __b13\n" |
| 12293 | "/// An 8-bit integral value used to initialize bits [151:144] of the result.\n" |
| 12294 | "/// \\param __b12\n" |
| 12295 | "/// An 8-bit integral value used to initialize bits [159:152] of the result.\n" |
| 12296 | "/// \\param __b11\n" |
| 12297 | "/// An 8-bit integral value used to initialize bits [167:160] of the result.\n" |
| 12298 | "/// \\param __b10\n" |
| 12299 | "/// An 8-bit integral value used to initialize bits [175:168] of the result.\n" |
| 12300 | "/// \\param __b09\n" |
| 12301 | "/// An 8-bit integral value used to initialize bits [183:176] of the result.\n" |
| 12302 | "/// \\param __b08\n" |
| 12303 | "/// An 8-bit integral value used to initialize bits [191:184] of the result.\n" |
| 12304 | "/// \\param __b07\n" |
| 12305 | "/// An 8-bit integral value used to initialize bits [199:192] of the result.\n" |
| 12306 | "/// \\param __b06\n" |
| 12307 | "/// An 8-bit integral value used to initialize bits [207:200] of the result.\n" |
| 12308 | "/// \\param __b05\n" |
| 12309 | "/// An 8-bit integral value used to initialize bits [215:208] of the result.\n" |
| 12310 | "/// \\param __b04\n" |
| 12311 | "/// An 8-bit integral value used to initialize bits [223:216] of the result.\n" |
| 12312 | "/// \\param __b03\n" |
| 12313 | "/// An 8-bit integral value used to initialize bits [231:224] of the result.\n" |
| 12314 | "/// \\param __b02\n" |
| 12315 | "/// An 8-bit integral value used to initialize bits [239:232] of the result.\n" |
| 12316 | "/// \\param __b01\n" |
| 12317 | "/// An 8-bit integral value used to initialize bits [247:240] of the result.\n" |
| 12318 | "/// \\param __b00\n" |
| 12319 | "/// An 8-bit integral value used to initialize bits [255:248] of the result.\n" |
| 12320 | "/// \\returns An initialized 256-bit integer vector.\n" |
| 12321 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12322 | "_mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,\n" |
| 12323 | " char __b27, char __b26, char __b25, char __b24,\n" |
| 12324 | " char __b23, char __b22, char __b21, char __b20,\n" |
| 12325 | " char __b19, char __b18, char __b17, char __b16,\n" |
| 12326 | " char __b15, char __b14, char __b13, char __b12,\n" |
| 12327 | " char __b11, char __b10, char __b09, char __b08,\n" |
| 12328 | " char __b07, char __b06, char __b05, char __b04,\n" |
| 12329 | " char __b03, char __b02, char __b01, char __b00)\n" |
| 12330 | "{\n" |
| 12331 | " return _mm256_set_epi8(__b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,\n" |
| 12332 | " __b08, __b09, __b10, __b11, __b12, __b13, __b14, __b15,\n" |
| 12333 | " __b16, __b17, __b18, __b19, __b20, __b21, __b22, __b23,\n" |
| 12334 | " __b24, __b25, __b26, __b27, __b28, __b29, __b30, __b31);\n" |
| 12335 | "}\n" |
| 12336 | "\n" |
| 12337 | "/// Constructs a 256-bit integer vector, initialized in reverse order\n" |
| 12338 | "/// with the specified 64-bit integral values.\n" |
| 12339 | "///\n" |
| 12340 | "/// \\headerfile <x86intrin.h>\n" |
| 12341 | "///\n" |
| 12342 | "/// This intrinsic corresponds to the <c> VPUNPCKLQDQ+VINSERTF128 </c>\n" |
| 12343 | "/// instruction.\n" |
| 12344 | "///\n" |
| 12345 | "/// \\param __a\n" |
| 12346 | "/// A 64-bit integral value used to initialize bits [63:0] of the result.\n" |
| 12347 | "/// \\param __b\n" |
| 12348 | "/// A 64-bit integral value used to initialize bits [127:64] of the result.\n" |
| 12349 | "/// \\param __c\n" |
| 12350 | "/// A 64-bit integral value used to initialize bits [191:128] of the result.\n" |
| 12351 | "/// \\param __d\n" |
| 12352 | "/// A 64-bit integral value used to initialize bits [255:192] of the result.\n" |
| 12353 | "/// \\returns An initialized 256-bit integer vector.\n" |
| 12354 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12355 | "_mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d)\n" |
| 12356 | "{\n" |
| 12357 | " return _mm256_set_epi64x(__d, __c, __b, __a);\n" |
| 12358 | "}\n" |
| 12359 | "\n" |
| 12360 | "/* Create vectors with repeated elements */\n" |
| 12361 | "/// Constructs a 256-bit floating-point vector of [4 x double], with each\n" |
| 12362 | "/// of the four double-precision floating-point vector elements set to the\n" |
| 12363 | "/// specified double-precision floating-point value.\n" |
| 12364 | "///\n" |
| 12365 | "/// \\headerfile <x86intrin.h>\n" |
| 12366 | "///\n" |
| 12367 | "/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n" |
| 12368 | "///\n" |
| 12369 | "/// \\param __w\n" |
| 12370 | "/// A double-precision floating-point value used to initialize each vector\n" |
| 12371 | "/// element of the result.\n" |
| 12372 | "/// \\returns An initialized 256-bit floating-point vector of [4 x double].\n" |
| 12373 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 12374 | "_mm256_set1_pd(double __w)\n" |
| 12375 | "{\n" |
| 12376 | " return _mm256_set_pd(__w, __w, __w, __w);\n" |
| 12377 | "}\n" |
| 12378 | "\n" |
| 12379 | "/// Constructs a 256-bit floating-point vector of [8 x float], with each\n" |
| 12380 | "/// of the eight single-precision floating-point vector elements set to the\n" |
| 12381 | "/// specified single-precision floating-point value.\n" |
| 12382 | "///\n" |
| 12383 | "/// \\headerfile <x86intrin.h>\n" |
| 12384 | "///\n" |
| 12385 | "/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n" |
| 12386 | "/// instruction.\n" |
| 12387 | "///\n" |
| 12388 | "/// \\param __w\n" |
| 12389 | "/// A single-precision floating-point value used to initialize each vector\n" |
| 12390 | "/// element of the result.\n" |
| 12391 | "/// \\returns An initialized 256-bit floating-point vector of [8 x float].\n" |
| 12392 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 12393 | "_mm256_set1_ps(float __w)\n" |
| 12394 | "{\n" |
| 12395 | " return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w);\n" |
| 12396 | "}\n" |
| 12397 | "\n" |
| 12398 | "/// Constructs a 256-bit integer vector of [8 x i32], with each of the\n" |
| 12399 | "/// 32-bit integral vector elements set to the specified 32-bit integral\n" |
| 12400 | "/// value.\n" |
| 12401 | "///\n" |
| 12402 | "/// \\headerfile <x86intrin.h>\n" |
| 12403 | "///\n" |
| 12404 | "/// This intrinsic corresponds to the <c> VPERMILPS+VINSERTF128 </c>\n" |
| 12405 | "/// instruction.\n" |
| 12406 | "///\n" |
| 12407 | "/// \\param __i\n" |
| 12408 | "/// A 32-bit integral value used to initialize each vector element of the\n" |
| 12409 | "/// result.\n" |
| 12410 | "/// \\returns An initialized 256-bit integer vector of [8 x i32].\n" |
| 12411 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12412 | "_mm256_set1_epi32(int __i)\n" |
| 12413 | "{\n" |
| 12414 | " return _mm256_set_epi32(__i, __i, __i, __i, __i, __i, __i, __i);\n" |
| 12415 | "}\n" |
| 12416 | "\n" |
| 12417 | "/// Constructs a 256-bit integer vector of [16 x i16], with each of the\n" |
| 12418 | "/// 16-bit integral vector elements set to the specified 16-bit integral\n" |
| 12419 | "/// value.\n" |
| 12420 | "///\n" |
| 12421 | "/// \\headerfile <x86intrin.h>\n" |
| 12422 | "///\n" |
| 12423 | "/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n" |
| 12424 | "///\n" |
| 12425 | "/// \\param __w\n" |
| 12426 | "/// A 16-bit integral value used to initialize each vector element of the\n" |
| 12427 | "/// result.\n" |
| 12428 | "/// \\returns An initialized 256-bit integer vector of [16 x i16].\n" |
| 12429 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12430 | "_mm256_set1_epi16(short __w)\n" |
| 12431 | "{\n" |
| 12432 | " return _mm256_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w,\n" |
| 12433 | " __w, __w, __w, __w, __w, __w, __w, __w);\n" |
| 12434 | "}\n" |
| 12435 | "\n" |
| 12436 | "/// Constructs a 256-bit integer vector of [32 x i8], with each of the\n" |
| 12437 | "/// 8-bit integral vector elements set to the specified 8-bit integral value.\n" |
| 12438 | "///\n" |
| 12439 | "/// \\headerfile <x86intrin.h>\n" |
| 12440 | "///\n" |
| 12441 | "/// This intrinsic corresponds to the <c> VPSHUFB+VINSERTF128 </c> instruction.\n" |
| 12442 | "///\n" |
| 12443 | "/// \\param __b\n" |
| 12444 | "/// An 8-bit integral value used to initialize each vector element of the\n" |
| 12445 | "/// result.\n" |
| 12446 | "/// \\returns An initialized 256-bit integer vector of [32 x i8].\n" |
| 12447 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12448 | "_mm256_set1_epi8(char __b)\n" |
| 12449 | "{\n" |
| 12450 | " return _mm256_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b,\n" |
| 12451 | " __b, __b, __b, __b, __b, __b, __b, __b,\n" |
| 12452 | " __b, __b, __b, __b, __b, __b, __b, __b,\n" |
| 12453 | " __b, __b, __b, __b, __b, __b, __b, __b);\n" |
| 12454 | "}\n" |
| 12455 | "\n" |
| 12456 | "/// Constructs a 256-bit integer vector of [4 x i64], with each of the\n" |
| 12457 | "/// 64-bit integral vector elements set to the specified 64-bit integral\n" |
| 12458 | "/// value.\n" |
| 12459 | "///\n" |
| 12460 | "/// \\headerfile <x86intrin.h>\n" |
| 12461 | "///\n" |
| 12462 | "/// This intrinsic corresponds to the <c> VMOVDDUP+VINSERTF128 </c> instruction.\n" |
| 12463 | "///\n" |
| 12464 | "/// \\param __q\n" |
| 12465 | "/// A 64-bit integral value used to initialize each vector element of the\n" |
| 12466 | "/// result.\n" |
| 12467 | "/// \\returns An initialized 256-bit integer vector of [4 x i64].\n" |
| 12468 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12469 | "_mm256_set1_epi64x(long long __q)\n" |
| 12470 | "{\n" |
| 12471 | " return _mm256_set_epi64x(__q, __q, __q, __q);\n" |
| 12472 | "}\n" |
| 12473 | "\n" |
| 12474 | "/* Create __zeroed vectors */\n" |
| 12475 | "/// Constructs a 256-bit floating-point vector of [4 x double] with all\n" |
| 12476 | "/// vector elements initialized to zero.\n" |
| 12477 | "///\n" |
| 12478 | "/// \\headerfile <x86intrin.h>\n" |
| 12479 | "///\n" |
| 12480 | "/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n" |
| 12481 | "///\n" |
| 12482 | "/// \\returns A 256-bit vector of [4 x double] with all elements set to zero.\n" |
| 12483 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 12484 | "_mm256_setzero_pd(void)\n" |
| 12485 | "{\n" |
| 12486 | " return __extension__ (__m256d){ 0, 0, 0, 0 };\n" |
| 12487 | "}\n" |
| 12488 | "\n" |
| 12489 | "/// Constructs a 256-bit floating-point vector of [8 x float] with all\n" |
| 12490 | "/// vector elements initialized to zero.\n" |
| 12491 | "///\n" |
| 12492 | "/// \\headerfile <x86intrin.h>\n" |
| 12493 | "///\n" |
| 12494 | "/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n" |
| 12495 | "///\n" |
| 12496 | "/// \\returns A 256-bit vector of [8 x float] with all elements set to zero.\n" |
| 12497 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 12498 | "_mm256_setzero_ps(void)\n" |
| 12499 | "{\n" |
| 12500 | " return __extension__ (__m256){ 0, 0, 0, 0, 0, 0, 0, 0 };\n" |
| 12501 | "}\n" |
| 12502 | "\n" |
| 12503 | "/// Constructs a 256-bit integer vector initialized to zero.\n" |
| 12504 | "///\n" |
| 12505 | "/// \\headerfile <x86intrin.h>\n" |
| 12506 | "///\n" |
| 12507 | "/// This intrinsic corresponds to the <c> VXORPS </c> instruction.\n" |
| 12508 | "///\n" |
| 12509 | "/// \\returns A 256-bit integer vector initialized to zero.\n" |
| 12510 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12511 | "_mm256_setzero_si256(void)\n" |
| 12512 | "{\n" |
| 12513 | " return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };\n" |
| 12514 | "}\n" |
| 12515 | "\n" |
| 12516 | "/* Cast between vector types */\n" |
| 12517 | "/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n" |
| 12518 | "/// floating-point vector of [8 x float].\n" |
| 12519 | "///\n" |
| 12520 | "/// \\headerfile <x86intrin.h>\n" |
| 12521 | "///\n" |
| 12522 | "/// This intrinsic has no corresponding instruction.\n" |
| 12523 | "///\n" |
| 12524 | "/// \\param __a\n" |
| 12525 | "/// A 256-bit floating-point vector of [4 x double].\n" |
| 12526 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n" |
| 12527 | "/// bitwise pattern as the parameter.\n" |
| 12528 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 12529 | "_mm256_castpd_ps(__m256d __a)\n" |
| 12530 | "{\n" |
| 12531 | " return (__m256)__a;\n" |
| 12532 | "}\n" |
| 12533 | "\n" |
| 12534 | "/// Casts a 256-bit floating-point vector of [4 x double] into a 256-bit\n" |
| 12535 | "/// integer vector.\n" |
| 12536 | "///\n" |
| 12537 | "/// \\headerfile <x86intrin.h>\n" |
| 12538 | "///\n" |
| 12539 | "/// This intrinsic has no corresponding instruction.\n" |
| 12540 | "///\n" |
| 12541 | "/// \\param __a\n" |
| 12542 | "/// A 256-bit floating-point vector of [4 x double].\n" |
| 12543 | "/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n" |
| 12544 | "/// parameter.\n" |
| 12545 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12546 | "_mm256_castpd_si256(__m256d __a)\n" |
| 12547 | "{\n" |
| 12548 | " return (__m256i)__a;\n" |
| 12549 | "}\n" |
| 12550 | "\n" |
| 12551 | "/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n" |
| 12552 | "/// floating-point vector of [4 x double].\n" |
| 12553 | "///\n" |
| 12554 | "/// \\headerfile <x86intrin.h>\n" |
| 12555 | "///\n" |
| 12556 | "/// This intrinsic has no corresponding instruction.\n" |
| 12557 | "///\n" |
| 12558 | "/// \\param __a\n" |
| 12559 | "/// A 256-bit floating-point vector of [8 x float].\n" |
| 12560 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n" |
| 12561 | "/// bitwise pattern as the parameter.\n" |
| 12562 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 12563 | "_mm256_castps_pd(__m256 __a)\n" |
| 12564 | "{\n" |
| 12565 | " return (__m256d)__a;\n" |
| 12566 | "}\n" |
| 12567 | "\n" |
| 12568 | "/// Casts a 256-bit floating-point vector of [8 x float] into a 256-bit\n" |
| 12569 | "/// integer vector.\n" |
| 12570 | "///\n" |
| 12571 | "/// \\headerfile <x86intrin.h>\n" |
| 12572 | "///\n" |
| 12573 | "/// This intrinsic has no corresponding instruction.\n" |
| 12574 | "///\n" |
| 12575 | "/// \\param __a\n" |
| 12576 | "/// A 256-bit floating-point vector of [8 x float].\n" |
| 12577 | "/// \\returns A 256-bit integer vector containing the same bitwise pattern as the\n" |
| 12578 | "/// parameter.\n" |
| 12579 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12580 | "_mm256_castps_si256(__m256 __a)\n" |
| 12581 | "{\n" |
| 12582 | " return (__m256i)__a;\n" |
| 12583 | "}\n" |
| 12584 | "\n" |
| 12585 | "/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n" |
| 12586 | "/// of [8 x float].\n" |
| 12587 | "///\n" |
| 12588 | "/// \\headerfile <x86intrin.h>\n" |
| 12589 | "///\n" |
| 12590 | "/// This intrinsic has no corresponding instruction.\n" |
| 12591 | "///\n" |
| 12592 | "/// \\param __a\n" |
| 12593 | "/// A 256-bit integer vector.\n" |
| 12594 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the same\n" |
| 12595 | "/// bitwise pattern as the parameter.\n" |
| 12596 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 12597 | "_mm256_castsi256_ps(__m256i __a)\n" |
| 12598 | "{\n" |
| 12599 | " return (__m256)__a;\n" |
| 12600 | "}\n" |
| 12601 | "\n" |
| 12602 | "/// Casts a 256-bit integer vector into a 256-bit floating-point vector\n" |
| 12603 | "/// of [4 x double].\n" |
| 12604 | "///\n" |
| 12605 | "/// \\headerfile <x86intrin.h>\n" |
| 12606 | "///\n" |
| 12607 | "/// This intrinsic has no corresponding instruction.\n" |
| 12608 | "///\n" |
| 12609 | "/// \\param __a\n" |
| 12610 | "/// A 256-bit integer vector.\n" |
| 12611 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the same\n" |
| 12612 | "/// bitwise pattern as the parameter.\n" |
| 12613 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 12614 | "_mm256_castsi256_pd(__m256i __a)\n" |
| 12615 | "{\n" |
| 12616 | " return (__m256d)__a;\n" |
| 12617 | "}\n" |
| 12618 | "\n" |
| 12619 | "/// Returns the lower 128 bits of a 256-bit floating-point vector of\n" |
| 12620 | "/// [4 x double] as a 128-bit floating-point vector of [2 x double].\n" |
| 12621 | "///\n" |
| 12622 | "/// \\headerfile <x86intrin.h>\n" |
| 12623 | "///\n" |
| 12624 | "/// This intrinsic has no corresponding instruction.\n" |
| 12625 | "///\n" |
| 12626 | "/// \\param __a\n" |
| 12627 | "/// A 256-bit floating-point vector of [4 x double].\n" |
| 12628 | "/// \\returns A 128-bit floating-point vector of [2 x double] containing the\n" |
| 12629 | "/// lower 128 bits of the parameter.\n" |
| 12630 | "static __inline __m128d __DEFAULT_FN_ATTRS\n" |
| 12631 | "_mm256_castpd256_pd128(__m256d __a)\n" |
| 12632 | "{\n" |
| 12633 | " return __builtin_shufflevector((__v4df)__a, (__v4df)__a, 0, 1);\n" |
| 12634 | "}\n" |
| 12635 | "\n" |
| 12636 | "/// Returns the lower 128 bits of a 256-bit floating-point vector of\n" |
| 12637 | "/// [8 x float] as a 128-bit floating-point vector of [4 x float].\n" |
| 12638 | "///\n" |
| 12639 | "/// \\headerfile <x86intrin.h>\n" |
| 12640 | "///\n" |
| 12641 | "/// This intrinsic has no corresponding instruction.\n" |
| 12642 | "///\n" |
| 12643 | "/// \\param __a\n" |
| 12644 | "/// A 256-bit floating-point vector of [8 x float].\n" |
| 12645 | "/// \\returns A 128-bit floating-point vector of [4 x float] containing the\n" |
| 12646 | "/// lower 128 bits of the parameter.\n" |
| 12647 | "static __inline __m128 __DEFAULT_FN_ATTRS\n" |
| 12648 | "_mm256_castps256_ps128(__m256 __a)\n" |
| 12649 | "{\n" |
| 12650 | " return __builtin_shufflevector((__v8sf)__a, (__v8sf)__a, 0, 1, 2, 3);\n" |
| 12651 | "}\n" |
| 12652 | "\n" |
| 12653 | "/// Truncates a 256-bit integer vector into a 128-bit integer vector.\n" |
| 12654 | "///\n" |
| 12655 | "/// \\headerfile <x86intrin.h>\n" |
| 12656 | "///\n" |
| 12657 | "/// This intrinsic has no corresponding instruction.\n" |
| 12658 | "///\n" |
| 12659 | "/// \\param __a\n" |
| 12660 | "/// A 256-bit integer vector.\n" |
| 12661 | "/// \\returns A 128-bit integer vector containing the lower 128 bits of the\n" |
| 12662 | "/// parameter.\n" |
| 12663 | "static __inline __m128i __DEFAULT_FN_ATTRS\n" |
| 12664 | "_mm256_castsi256_si128(__m256i __a)\n" |
| 12665 | "{\n" |
| 12666 | " return __builtin_shufflevector((__v4di)__a, (__v4di)__a, 0, 1);\n" |
| 12667 | "}\n" |
| 12668 | "\n" |
| 12669 | "/// Constructs a 256-bit floating-point vector of [4 x double] from a\n" |
| 12670 | "/// 128-bit floating-point vector of [2 x double].\n" |
| 12671 | "///\n" |
| 12672 | "/// The lower 128 bits contain the value of the source vector. The contents\n" |
| 12673 | "/// of the upper 128 bits are undefined.\n" |
| 12674 | "///\n" |
| 12675 | "/// \\headerfile <x86intrin.h>\n" |
| 12676 | "///\n" |
| 12677 | "/// This intrinsic has no corresponding instruction.\n" |
| 12678 | "///\n" |
| 12679 | "/// \\param __a\n" |
| 12680 | "/// A 128-bit vector of [2 x double].\n" |
| 12681 | "/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n" |
| 12682 | "/// contain the value of the parameter. The contents of the upper 128 bits\n" |
| 12683 | "/// are undefined.\n" |
| 12684 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 12685 | "_mm256_castpd128_pd256(__m128d __a)\n" |
| 12686 | "{\n" |
| 12687 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 1, -1, -1);\n" |
| 12688 | "}\n" |
| 12689 | "\n" |
| 12690 | "/// Constructs a 256-bit floating-point vector of [8 x float] from a\n" |
| 12691 | "/// 128-bit floating-point vector of [4 x float].\n" |
| 12692 | "///\n" |
| 12693 | "/// The lower 128 bits contain the value of the source vector. The contents\n" |
| 12694 | "/// of the upper 128 bits are undefined.\n" |
| 12695 | "///\n" |
| 12696 | "/// \\headerfile <x86intrin.h>\n" |
| 12697 | "///\n" |
| 12698 | "/// This intrinsic has no corresponding instruction.\n" |
| 12699 | "///\n" |
| 12700 | "/// \\param __a\n" |
| 12701 | "/// A 128-bit vector of [4 x float].\n" |
| 12702 | "/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n" |
| 12703 | "/// contain the value of the parameter. The contents of the upper 128 bits\n" |
| 12704 | "/// are undefined.\n" |
| 12705 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 12706 | "_mm256_castps128_ps256(__m128 __a)\n" |
| 12707 | "{\n" |
| 12708 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1, 2, 3, -1, -1, -1, -1);\n" |
| 12709 | "}\n" |
| 12710 | "\n" |
| 12711 | "/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n" |
| 12712 | "///\n" |
| 12713 | "/// The lower 128 bits contain the value of the source vector. The contents\n" |
| 12714 | "/// of the upper 128 bits are undefined.\n" |
| 12715 | "///\n" |
| 12716 | "/// \\headerfile <x86intrin.h>\n" |
| 12717 | "///\n" |
| 12718 | "/// This intrinsic has no corresponding instruction.\n" |
| 12719 | "///\n" |
| 12720 | "/// \\param __a\n" |
| 12721 | "/// A 128-bit integer vector.\n" |
| 12722 | "/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n" |
| 12723 | "/// the parameter. The contents of the upper 128 bits are undefined.\n" |
| 12724 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12725 | "_mm256_castsi128_si256(__m128i __a)\n" |
| 12726 | "{\n" |
| 12727 | " return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1);\n" |
| 12728 | "}\n" |
| 12729 | "\n" |
| 12730 | "/// Constructs a 256-bit floating-point vector of [4 x double] from a\n" |
| 12731 | "/// 128-bit floating-point vector of [2 x double]. The lower 128 bits\n" |
| 12732 | "/// contain the value of the source vector. The upper 128 bits are set\n" |
| 12733 | "/// to zero.\n" |
| 12734 | "///\n" |
| 12735 | "/// \\headerfile <x86intrin.h>\n" |
| 12736 | "///\n" |
| 12737 | "/// This intrinsic has no corresponding instruction.\n" |
| 12738 | "///\n" |
| 12739 | "/// \\param __a\n" |
| 12740 | "/// A 128-bit vector of [2 x double].\n" |
| 12741 | "/// \\returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits\n" |
| 12742 | "/// contain the value of the parameter. The upper 128 bits are set to zero.\n" |
| 12743 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 12744 | "_mm256_zextpd128_pd256(__m128d __a)\n" |
| 12745 | "{\n" |
| 12746 | " return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3);\n" |
| 12747 | "}\n" |
| 12748 | "\n" |
| 12749 | "/// Constructs a 256-bit floating-point vector of [8 x float] from a\n" |
| 12750 | "/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain\n" |
| 12751 | "/// the value of the source vector. The upper 128 bits are set to zero.\n" |
| 12752 | "///\n" |
| 12753 | "/// \\headerfile <x86intrin.h>\n" |
| 12754 | "///\n" |
| 12755 | "/// This intrinsic has no corresponding instruction.\n" |
| 12756 | "///\n" |
| 12757 | "/// \\param __a\n" |
| 12758 | "/// A 128-bit vector of [4 x float].\n" |
| 12759 | "/// \\returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits\n" |
| 12760 | "/// contain the value of the parameter. The upper 128 bits are set to zero.\n" |
| 12761 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 12762 | "_mm256_zextps128_ps256(__m128 __a)\n" |
| 12763 | "{\n" |
| 12764 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7);\n" |
| 12765 | "}\n" |
| 12766 | "\n" |
| 12767 | "/// Constructs a 256-bit integer vector from a 128-bit integer vector.\n" |
| 12768 | "/// The lower 128 bits contain the value of the source vector. The upper\n" |
| 12769 | "/// 128 bits are set to zero.\n" |
| 12770 | "///\n" |
| 12771 | "/// \\headerfile <x86intrin.h>\n" |
| 12772 | "///\n" |
| 12773 | "/// This intrinsic has no corresponding instruction.\n" |
| 12774 | "///\n" |
| 12775 | "/// \\param __a\n" |
| 12776 | "/// A 128-bit integer vector.\n" |
| 12777 | "/// \\returns A 256-bit integer vector. The lower 128 bits contain the value of\n" |
| 12778 | "/// the parameter. The upper 128 bits are set to zero.\n" |
| 12779 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 12780 | "_mm256_zextsi128_si256(__m128i __a)\n" |
| 12781 | "{\n" |
| 12782 | " return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3);\n" |
| 12783 | "}\n" |
| 12784 | "\n" |
| 12785 | "/*\n" |
| 12786 | " Vector insert.\n" |
| 12787 | " We use macros rather than inlines because we only want to accept\n" |
| 12788 | " invocations where the immediate M is a constant expression.\n" |
| 12789 | "*/\n" |
| 12790 | "/// Constructs a new 256-bit vector of [8 x float] by first duplicating\n" |
| 12791 | "/// a 256-bit vector of [8 x float] given in the first parameter, and then\n" |
| 12792 | "/// replacing either the upper or the lower 128 bits with the contents of a\n" |
| 12793 | "/// 128-bit vector of [4 x float] in the second parameter.\n" |
| 12794 | "///\n" |
| 12795 | "/// The immediate integer parameter determines between the upper or the lower\n" |
| 12796 | "/// 128 bits.\n" |
| 12797 | "///\n" |
| 12798 | "/// \\headerfile <x86intrin.h>\n" |
| 12799 | "///\n" |
| 12800 | "/// \\code\n" |
| 12801 | "/// __m256 _mm256_insertf128_ps(__m256 V1, __m128 V2, const int M);\n" |
| 12802 | "/// \\endcode\n" |
| 12803 | "///\n" |
| 12804 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 12805 | "///\n" |
| 12806 | "/// \\param V1\n" |
| 12807 | "/// A 256-bit vector of [8 x float]. This vector is copied to the result\n" |
| 12808 | "/// first, and then either the upper or the lower 128 bits of the result will\n" |
| 12809 | "/// be replaced by the contents of \\a V2.\n" |
| 12810 | "/// \\param V2\n" |
| 12811 | "/// A 128-bit vector of [4 x float]. The contents of this parameter are\n" |
| 12812 | "/// written to either the upper or the lower 128 bits of the result depending\n" |
| 12813 | "/// on the value of parameter \\a M.\n" |
| 12814 | "/// \\param M\n" |
| 12815 | "/// An immediate integer. The least significant bit determines how the values\n" |
| 12816 | "/// from the two parameters are interleaved: \\n\n" |
| 12817 | "/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n" |
| 12818 | "/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n" |
| 12819 | "/// result. \\n\n" |
| 12820 | "/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n" |
| 12821 | "/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n" |
| 12822 | "/// result.\n" |
| 12823 | "/// \\returns A 256-bit vector of [8 x float] containing the interleaved values.\n" |
| 12824 | "#define _mm256_insertf128_ps(V1, V2, M) \\\n" |
| 12825 | " (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \\\n" |
| 12826 | " (__v4sf)(__m128)(V2), (int)(M))\n" |
| 12827 | "\n" |
| 12828 | "/// Constructs a new 256-bit vector of [4 x double] by first duplicating\n" |
| 12829 | "/// a 256-bit vector of [4 x double] given in the first parameter, and then\n" |
| 12830 | "/// replacing either the upper or the lower 128 bits with the contents of a\n" |
| 12831 | "/// 128-bit vector of [2 x double] in the second parameter.\n" |
| 12832 | "///\n" |
| 12833 | "/// The immediate integer parameter determines between the upper or the lower\n" |
| 12834 | "/// 128 bits.\n" |
| 12835 | "///\n" |
| 12836 | "/// \\headerfile <x86intrin.h>\n" |
| 12837 | "///\n" |
| 12838 | "/// \\code\n" |
| 12839 | "/// __m256d _mm256_insertf128_pd(__m256d V1, __m128d V2, const int M);\n" |
| 12840 | "/// \\endcode\n" |
| 12841 | "///\n" |
| 12842 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 12843 | "///\n" |
| 12844 | "/// \\param V1\n" |
| 12845 | "/// A 256-bit vector of [4 x double]. This vector is copied to the result\n" |
| 12846 | "/// first, and then either the upper or the lower 128 bits of the result will\n" |
| 12847 | "/// be replaced by the contents of \\a V2.\n" |
| 12848 | "/// \\param V2\n" |
| 12849 | "/// A 128-bit vector of [2 x double]. The contents of this parameter are\n" |
| 12850 | "/// written to either the upper or the lower 128 bits of the result depending\n" |
| 12851 | "/// on the value of parameter \\a M.\n" |
| 12852 | "/// \\param M\n" |
| 12853 | "/// An immediate integer. The least significant bit determines how the values\n" |
| 12854 | "/// from the two parameters are interleaved: \\n\n" |
| 12855 | "/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n" |
| 12856 | "/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n" |
| 12857 | "/// result. \\n\n" |
| 12858 | "/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n" |
| 12859 | "/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n" |
| 12860 | "/// result.\n" |
| 12861 | "/// \\returns A 256-bit vector of [4 x double] containing the interleaved values.\n" |
| 12862 | "#define _mm256_insertf128_pd(V1, V2, M) \\\n" |
| 12863 | " (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \\\n" |
| 12864 | " (__v2df)(__m128d)(V2), (int)(M))\n" |
| 12865 | "\n" |
| 12866 | "/// Constructs a new 256-bit integer vector by first duplicating a\n" |
| 12867 | "/// 256-bit integer vector given in the first parameter, and then replacing\n" |
| 12868 | "/// either the upper or the lower 128 bits with the contents of a 128-bit\n" |
| 12869 | "/// integer vector in the second parameter.\n" |
| 12870 | "///\n" |
| 12871 | "/// The immediate integer parameter determines between the upper or the lower\n" |
| 12872 | "/// 128 bits.\n" |
| 12873 | "///\n" |
| 12874 | "/// \\headerfile <x86intrin.h>\n" |
| 12875 | "///\n" |
| 12876 | "/// \\code\n" |
| 12877 | "/// __m256i _mm256_insertf128_si256(__m256i V1, __m128i V2, const int M);\n" |
| 12878 | "/// \\endcode\n" |
| 12879 | "///\n" |
| 12880 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 12881 | "///\n" |
| 12882 | "/// \\param V1\n" |
| 12883 | "/// A 256-bit integer vector. This vector is copied to the result first, and\n" |
| 12884 | "/// then either the upper or the lower 128 bits of the result will be\n" |
| 12885 | "/// replaced by the contents of \\a V2.\n" |
| 12886 | "/// \\param V2\n" |
| 12887 | "/// A 128-bit integer vector. The contents of this parameter are written to\n" |
| 12888 | "/// either the upper or the lower 128 bits of the result depending on the\n" |
| 12889 | "/// value of parameter \\a M.\n" |
| 12890 | "/// \\param M\n" |
| 12891 | "/// An immediate integer. The least significant bit determines how the values\n" |
| 12892 | "/// from the two parameters are interleaved: \\n\n" |
| 12893 | "/// If bit [0] of \\a M is 0, \\a V2 are copied to bits [127:0] of the result,\n" |
| 12894 | "/// and bits [255:128] of \\a V1 are copied to bits [255:128] of the\n" |
| 12895 | "/// result. \\n\n" |
| 12896 | "/// If bit [0] of \\a M is 1, \\a V2 are copied to bits [255:128] of the\n" |
| 12897 | "/// result, and bits [127:0] of \\a V1 are copied to bits [127:0] of the\n" |
| 12898 | "/// result.\n" |
| 12899 | "/// \\returns A 256-bit integer vector containing the interleaved values.\n" |
| 12900 | "#define _mm256_insertf128_si256(V1, V2, M) \\\n" |
| 12901 | " (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \\\n" |
| 12902 | " (__v4si)(__m128i)(V2), (int)(M))\n" |
| 12903 | "\n" |
| 12904 | "/*\n" |
| 12905 | " Vector extract.\n" |
| 12906 | " We use macros rather than inlines because we only want to accept\n" |
| 12907 | " invocations where the immediate M is a constant expression.\n" |
| 12908 | "*/\n" |
| 12909 | "/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n" |
| 12910 | "/// of [8 x float], as determined by the immediate integer parameter, and\n" |
| 12911 | "/// returns the extracted bits as a 128-bit vector of [4 x float].\n" |
| 12912 | "///\n" |
| 12913 | "/// \\headerfile <x86intrin.h>\n" |
| 12914 | "///\n" |
| 12915 | "/// \\code\n" |
| 12916 | "/// __m128 _mm256_extractf128_ps(__m256 V, const int M);\n" |
| 12917 | "/// \\endcode\n" |
| 12918 | "///\n" |
| 12919 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n" |
| 12920 | "///\n" |
| 12921 | "/// \\param V\n" |
| 12922 | "/// A 256-bit vector of [8 x float].\n" |
| 12923 | "/// \\param M\n" |
| 12924 | "/// An immediate integer. The least significant bit determines which bits are\n" |
| 12925 | "/// extracted from the first parameter: \\n\n" |
| 12926 | "/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n" |
| 12927 | "/// result. \\n\n" |
| 12928 | "/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n" |
| 12929 | "/// \\returns A 128-bit vector of [4 x float] containing the extracted bits.\n" |
| 12930 | "#define _mm256_extractf128_ps(V, M) \\\n" |
| 12931 | " (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))\n" |
| 12932 | "\n" |
| 12933 | "/// Extracts either the upper or the lower 128 bits from a 256-bit vector\n" |
| 12934 | "/// of [4 x double], as determined by the immediate integer parameter, and\n" |
| 12935 | "/// returns the extracted bits as a 128-bit vector of [2 x double].\n" |
| 12936 | "///\n" |
| 12937 | "/// \\headerfile <x86intrin.h>\n" |
| 12938 | "///\n" |
| 12939 | "/// \\code\n" |
| 12940 | "/// __m128d _mm256_extractf128_pd(__m256d V, const int M);\n" |
| 12941 | "/// \\endcode\n" |
| 12942 | "///\n" |
| 12943 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n" |
| 12944 | "///\n" |
| 12945 | "/// \\param V\n" |
| 12946 | "/// A 256-bit vector of [4 x double].\n" |
| 12947 | "/// \\param M\n" |
| 12948 | "/// An immediate integer. The least significant bit determines which bits are\n" |
| 12949 | "/// extracted from the first parameter: \\n\n" |
| 12950 | "/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n" |
| 12951 | "/// result. \\n\n" |
| 12952 | "/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n" |
| 12953 | "/// \\returns A 128-bit vector of [2 x double] containing the extracted bits.\n" |
| 12954 | "#define _mm256_extractf128_pd(V, M) \\\n" |
| 12955 | " (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))\n" |
| 12956 | "\n" |
| 12957 | "/// Extracts either the upper or the lower 128 bits from a 256-bit\n" |
| 12958 | "/// integer vector, as determined by the immediate integer parameter, and\n" |
| 12959 | "/// returns the extracted bits as a 128-bit integer vector.\n" |
| 12960 | "///\n" |
| 12961 | "/// \\headerfile <x86intrin.h>\n" |
| 12962 | "///\n" |
| 12963 | "/// \\code\n" |
| 12964 | "/// __m128i _mm256_extractf128_si256(__m256i V, const int M);\n" |
| 12965 | "/// \\endcode\n" |
| 12966 | "///\n" |
| 12967 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction.\n" |
| 12968 | "///\n" |
| 12969 | "/// \\param V\n" |
| 12970 | "/// A 256-bit integer vector.\n" |
| 12971 | "/// \\param M\n" |
| 12972 | "/// An immediate integer. The least significant bit determines which bits are\n" |
| 12973 | "/// extracted from the first parameter: \\n\n" |
| 12974 | "/// If bit [0] of \\a M is 0, bits [127:0] of \\a V are copied to the\n" |
| 12975 | "/// result. \\n\n" |
| 12976 | "/// If bit [0] of \\a M is 1, bits [255:128] of \\a V are copied to the result.\n" |
| 12977 | "/// \\returns A 128-bit integer vector containing the extracted bits.\n" |
| 12978 | "#define _mm256_extractf128_si256(V, M) \\\n" |
| 12979 | " (__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))\n" |
| 12980 | "\n" |
| 12981 | "/* SIMD load ops (unaligned) */\n" |
| 12982 | "/// Loads two 128-bit floating-point vectors of [4 x float] from\n" |
| 12983 | "/// unaligned memory locations and constructs a 256-bit floating-point vector\n" |
| 12984 | "/// of [8 x float] by concatenating the two 128-bit vectors.\n" |
| 12985 | "///\n" |
| 12986 | "/// \\headerfile <x86intrin.h>\n" |
| 12987 | "///\n" |
| 12988 | "/// This intrinsic corresponds to load instructions followed by the\n" |
| 12989 | "/// <c> VINSERTF128 </c> instruction.\n" |
| 12990 | "///\n" |
| 12991 | "/// \\param __addr_hi\n" |
| 12992 | "/// A pointer to a 128-bit memory location containing 4 consecutive\n" |
| 12993 | "/// single-precision floating-point values. These values are to be copied to\n" |
| 12994 | "/// bits[255:128] of the result. The address of the memory location does not\n" |
| 12995 | "/// have to be aligned.\n" |
| 12996 | "/// \\param __addr_lo\n" |
| 12997 | "/// A pointer to a 128-bit memory location containing 4 consecutive\n" |
| 12998 | "/// single-precision floating-point values. These values are to be copied to\n" |
| 12999 | "/// bits[127:0] of the result. The address of the memory location does not\n" |
| 13000 | "/// have to be aligned.\n" |
| 13001 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n" |
| 13002 | "/// concatenated result.\n" |
| 13003 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 13004 | "_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)\n" |
| 13005 | "{\n" |
| 13006 | " __m256 __v256 = _mm256_castps128_ps256(_mm_loadu_ps(__addr_lo));\n" |
| 13007 | " return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1);\n" |
| 13008 | "}\n" |
| 13009 | "\n" |
| 13010 | "/// Loads two 128-bit floating-point vectors of [2 x double] from\n" |
| 13011 | "/// unaligned memory locations and constructs a 256-bit floating-point vector\n" |
| 13012 | "/// of [4 x double] by concatenating the two 128-bit vectors.\n" |
| 13013 | "///\n" |
| 13014 | "/// \\headerfile <x86intrin.h>\n" |
| 13015 | "///\n" |
| 13016 | "/// This intrinsic corresponds to load instructions followed by the\n" |
| 13017 | "/// <c> VINSERTF128 </c> instruction.\n" |
| 13018 | "///\n" |
| 13019 | "/// \\param __addr_hi\n" |
| 13020 | "/// A pointer to a 128-bit memory location containing two consecutive\n" |
| 13021 | "/// double-precision floating-point values. These values are to be copied to\n" |
| 13022 | "/// bits[255:128] of the result. The address of the memory location does not\n" |
| 13023 | "/// have to be aligned.\n" |
| 13024 | "/// \\param __addr_lo\n" |
| 13025 | "/// A pointer to a 128-bit memory location containing two consecutive\n" |
| 13026 | "/// double-precision floating-point values. These values are to be copied to\n" |
| 13027 | "/// bits[127:0] of the result. The address of the memory location does not\n" |
| 13028 | "/// have to be aligned.\n" |
| 13029 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n" |
| 13030 | "/// concatenated result.\n" |
| 13031 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 13032 | "_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)\n" |
| 13033 | "{\n" |
| 13034 | " __m256d __v256 = _mm256_castpd128_pd256(_mm_loadu_pd(__addr_lo));\n" |
| 13035 | " return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1);\n" |
| 13036 | "}\n" |
| 13037 | "\n" |
| 13038 | "/// Loads two 128-bit integer vectors from unaligned memory locations and\n" |
| 13039 | "/// constructs a 256-bit integer vector by concatenating the two 128-bit\n" |
| 13040 | "/// vectors.\n" |
| 13041 | "///\n" |
| 13042 | "/// \\headerfile <x86intrin.h>\n" |
| 13043 | "///\n" |
| 13044 | "/// This intrinsic corresponds to load instructions followed by the\n" |
| 13045 | "/// <c> VINSERTF128 </c> instruction.\n" |
| 13046 | "///\n" |
| 13047 | "/// \\param __addr_hi\n" |
| 13048 | "/// A pointer to a 128-bit memory location containing a 128-bit integer\n" |
| 13049 | "/// vector. This vector is to be copied to bits[255:128] of the result. The\n" |
| 13050 | "/// address of the memory location does not have to be aligned.\n" |
| 13051 | "/// \\param __addr_lo\n" |
| 13052 | "/// A pointer to a 128-bit memory location containing a 128-bit integer\n" |
| 13053 | "/// vector. This vector is to be copied to bits[127:0] of the result. The\n" |
| 13054 | "/// address of the memory location does not have to be aligned.\n" |
| 13055 | "/// \\returns A 256-bit integer vector containing the concatenated result.\n" |
| 13056 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 13057 | "_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo)\n" |
| 13058 | "{\n" |
| 13059 | " __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));\n" |
| 13060 | " return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);\n" |
| 13061 | "}\n" |
| 13062 | "\n" |
| 13063 | "/* SIMD store ops (unaligned) */\n" |
| 13064 | "/// Stores the upper and lower 128 bits of a 256-bit floating-point\n" |
| 13065 | "/// vector of [8 x float] into two different unaligned memory locations.\n" |
| 13066 | "///\n" |
| 13067 | "/// \\headerfile <x86intrin.h>\n" |
| 13068 | "///\n" |
| 13069 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n" |
| 13070 | "/// store instructions.\n" |
| 13071 | "///\n" |
| 13072 | "/// \\param __addr_hi\n" |
| 13073 | "/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n" |
| 13074 | "/// copied to this memory location. The address of this memory location does\n" |
| 13075 | "/// not have to be aligned.\n" |
| 13076 | "/// \\param __addr_lo\n" |
| 13077 | "/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n" |
| 13078 | "/// copied to this memory location. The address of this memory location does\n" |
| 13079 | "/// not have to be aligned.\n" |
| 13080 | "/// \\param __a\n" |
| 13081 | "/// A 256-bit floating-point vector of [8 x float].\n" |
| 13082 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 13083 | "_mm256_storeu2_m128(float *__addr_hi, float *__addr_lo, __m256 __a)\n" |
| 13084 | "{\n" |
| 13085 | " __m128 __v128;\n" |
| 13086 | "\n" |
| 13087 | " __v128 = _mm256_castps256_ps128(__a);\n" |
| 13088 | " _mm_storeu_ps(__addr_lo, __v128);\n" |
| 13089 | " __v128 = _mm256_extractf128_ps(__a, 1);\n" |
| 13090 | " _mm_storeu_ps(__addr_hi, __v128);\n" |
| 13091 | "}\n" |
| 13092 | "\n" |
| 13093 | "/// Stores the upper and lower 128 bits of a 256-bit floating-point\n" |
| 13094 | "/// vector of [4 x double] into two different unaligned memory locations.\n" |
| 13095 | "///\n" |
| 13096 | "/// \\headerfile <x86intrin.h>\n" |
| 13097 | "///\n" |
| 13098 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n" |
| 13099 | "/// store instructions.\n" |
| 13100 | "///\n" |
| 13101 | "/// \\param __addr_hi\n" |
| 13102 | "/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n" |
| 13103 | "/// copied to this memory location. The address of this memory location does\n" |
| 13104 | "/// not have to be aligned.\n" |
| 13105 | "/// \\param __addr_lo\n" |
| 13106 | "/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n" |
| 13107 | "/// copied to this memory location. The address of this memory location does\n" |
| 13108 | "/// not have to be aligned.\n" |
| 13109 | "/// \\param __a\n" |
| 13110 | "/// A 256-bit floating-point vector of [4 x double].\n" |
| 13111 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 13112 | "_mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a)\n" |
| 13113 | "{\n" |
| 13114 | " __m128d __v128;\n" |
| 13115 | "\n" |
| 13116 | " __v128 = _mm256_castpd256_pd128(__a);\n" |
| 13117 | " _mm_storeu_pd(__addr_lo, __v128);\n" |
| 13118 | " __v128 = _mm256_extractf128_pd(__a, 1);\n" |
| 13119 | " _mm_storeu_pd(__addr_hi, __v128);\n" |
| 13120 | "}\n" |
| 13121 | "\n" |
| 13122 | "/// Stores the upper and lower 128 bits of a 256-bit integer vector into\n" |
| 13123 | "/// two different unaligned memory locations.\n" |
| 13124 | "///\n" |
| 13125 | "/// \\headerfile <x86intrin.h>\n" |
| 13126 | "///\n" |
| 13127 | "/// This intrinsic corresponds to the <c> VEXTRACTF128 </c> instruction and the\n" |
| 13128 | "/// store instructions.\n" |
| 13129 | "///\n" |
| 13130 | "/// \\param __addr_hi\n" |
| 13131 | "/// A pointer to a 128-bit memory location. Bits[255:128] of \\a __a are to be\n" |
| 13132 | "/// copied to this memory location. The address of this memory location does\n" |
| 13133 | "/// not have to be aligned.\n" |
| 13134 | "/// \\param __addr_lo\n" |
| 13135 | "/// A pointer to a 128-bit memory location. Bits[127:0] of \\a __a are to be\n" |
| 13136 | "/// copied to this memory location. The address of this memory location does\n" |
| 13137 | "/// not have to be aligned.\n" |
| 13138 | "/// \\param __a\n" |
| 13139 | "/// A 256-bit integer vector.\n" |
| 13140 | "static __inline void __DEFAULT_FN_ATTRS\n" |
| 13141 | "_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a)\n" |
| 13142 | "{\n" |
| 13143 | " __m128i __v128;\n" |
| 13144 | "\n" |
| 13145 | " __v128 = _mm256_castsi256_si128(__a);\n" |
| 13146 | " _mm_storeu_si128(__addr_lo, __v128);\n" |
| 13147 | " __v128 = _mm256_extractf128_si256(__a, 1);\n" |
| 13148 | " _mm_storeu_si128(__addr_hi, __v128);\n" |
| 13149 | "}\n" |
| 13150 | "\n" |
| 13151 | "/// Constructs a 256-bit floating-point vector of [8 x float] by\n" |
| 13152 | "/// concatenating two 128-bit floating-point vectors of [4 x float].\n" |
| 13153 | "///\n" |
| 13154 | "/// \\headerfile <x86intrin.h>\n" |
| 13155 | "///\n" |
| 13156 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 13157 | "///\n" |
| 13158 | "/// \\param __hi\n" |
| 13159 | "/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n" |
| 13160 | "/// 128 bits of the result.\n" |
| 13161 | "/// \\param __lo\n" |
| 13162 | "/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n" |
| 13163 | "/// 128 bits of the result.\n" |
| 13164 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n" |
| 13165 | "/// concatenated result.\n" |
| 13166 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 13167 | "_mm256_set_m128 (__m128 __hi, __m128 __lo)\n" |
| 13168 | "{\n" |
| 13169 | " return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);\n" |
| 13170 | "}\n" |
| 13171 | "\n" |
| 13172 | "/// Constructs a 256-bit floating-point vector of [4 x double] by\n" |
| 13173 | "/// concatenating two 128-bit floating-point vectors of [2 x double].\n" |
| 13174 | "///\n" |
| 13175 | "/// \\headerfile <x86intrin.h>\n" |
| 13176 | "///\n" |
| 13177 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 13178 | "///\n" |
| 13179 | "/// \\param __hi\n" |
| 13180 | "/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n" |
| 13181 | "/// 128 bits of the result.\n" |
| 13182 | "/// \\param __lo\n" |
| 13183 | "/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n" |
| 13184 | "/// 128 bits of the result.\n" |
| 13185 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n" |
| 13186 | "/// concatenated result.\n" |
| 13187 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 13188 | "_mm256_set_m128d (__m128d __hi, __m128d __lo)\n" |
| 13189 | "{\n" |
| 13190 | " return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);\n" |
| 13191 | "}\n" |
| 13192 | "\n" |
| 13193 | "/// Constructs a 256-bit integer vector by concatenating two 128-bit\n" |
| 13194 | "/// integer vectors.\n" |
| 13195 | "///\n" |
| 13196 | "/// \\headerfile <x86intrin.h>\n" |
| 13197 | "///\n" |
| 13198 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 13199 | "///\n" |
| 13200 | "/// \\param __hi\n" |
| 13201 | "/// A 128-bit integer vector to be copied to the upper 128 bits of the\n" |
| 13202 | "/// result.\n" |
| 13203 | "/// \\param __lo\n" |
| 13204 | "/// A 128-bit integer vector to be copied to the lower 128 bits of the\n" |
| 13205 | "/// result.\n" |
| 13206 | "/// \\returns A 256-bit integer vector containing the concatenated result.\n" |
| 13207 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 13208 | "_mm256_set_m128i (__m128i __hi, __m128i __lo)\n" |
| 13209 | "{\n" |
| 13210 | " return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);\n" |
| 13211 | "}\n" |
| 13212 | "\n" |
| 13213 | "/// Constructs a 256-bit floating-point vector of [8 x float] by\n" |
| 13214 | "/// concatenating two 128-bit floating-point vectors of [4 x float]. This is\n" |
| 13215 | "/// similar to _mm256_set_m128, but the order of the input parameters is\n" |
| 13216 | "/// swapped.\n" |
| 13217 | "///\n" |
| 13218 | "/// \\headerfile <x86intrin.h>\n" |
| 13219 | "///\n" |
| 13220 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 13221 | "///\n" |
| 13222 | "/// \\param __lo\n" |
| 13223 | "/// A 128-bit floating-point vector of [4 x float] to be copied to the lower\n" |
| 13224 | "/// 128 bits of the result.\n" |
| 13225 | "/// \\param __hi\n" |
| 13226 | "/// A 128-bit floating-point vector of [4 x float] to be copied to the upper\n" |
| 13227 | "/// 128 bits of the result.\n" |
| 13228 | "/// \\returns A 256-bit floating-point vector of [8 x float] containing the\n" |
| 13229 | "/// concatenated result.\n" |
| 13230 | "static __inline __m256 __DEFAULT_FN_ATTRS\n" |
| 13231 | "_mm256_setr_m128 (__m128 __lo, __m128 __hi)\n" |
| 13232 | "{\n" |
| 13233 | " return _mm256_set_m128(__hi, __lo);\n" |
| 13234 | "}\n" |
| 13235 | "\n" |
| 13236 | "/// Constructs a 256-bit floating-point vector of [4 x double] by\n" |
| 13237 | "/// concatenating two 128-bit floating-point vectors of [2 x double]. This is\n" |
| 13238 | "/// similar to _mm256_set_m128d, but the order of the input parameters is\n" |
| 13239 | "/// swapped.\n" |
| 13240 | "///\n" |
| 13241 | "/// \\headerfile <x86intrin.h>\n" |
| 13242 | "///\n" |
| 13243 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 13244 | "///\n" |
| 13245 | "/// \\param __lo\n" |
| 13246 | "/// A 128-bit floating-point vector of [2 x double] to be copied to the lower\n" |
| 13247 | "/// 128 bits of the result.\n" |
| 13248 | "/// \\param __hi\n" |
| 13249 | "/// A 128-bit floating-point vector of [2 x double] to be copied to the upper\n" |
| 13250 | "/// 128 bits of the result.\n" |
| 13251 | "/// \\returns A 256-bit floating-point vector of [4 x double] containing the\n" |
| 13252 | "/// concatenated result.\n" |
| 13253 | "static __inline __m256d __DEFAULT_FN_ATTRS\n" |
| 13254 | "_mm256_setr_m128d (__m128d __lo, __m128d __hi)\n" |
| 13255 | "{\n" |
| 13256 | " return (__m256d)_mm256_set_m128d(__hi, __lo);\n" |
| 13257 | "}\n" |
| 13258 | "\n" |
| 13259 | "/// Constructs a 256-bit integer vector by concatenating two 128-bit\n" |
| 13260 | "/// integer vectors. This is similar to _mm256_set_m128i, but the order of\n" |
| 13261 | "/// the input parameters is swapped.\n" |
| 13262 | "///\n" |
| 13263 | "/// \\headerfile <x86intrin.h>\n" |
| 13264 | "///\n" |
| 13265 | "/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.\n" |
| 13266 | "///\n" |
| 13267 | "/// \\param __lo\n" |
| 13268 | "/// A 128-bit integer vector to be copied to the lower 128 bits of the\n" |
| 13269 | "/// result.\n" |
| 13270 | "/// \\param __hi\n" |
| 13271 | "/// A 128-bit integer vector to be copied to the upper 128 bits of the\n" |
| 13272 | "/// result.\n" |
| 13273 | "/// \\returns A 256-bit integer vector containing the concatenated result.\n" |
| 13274 | "static __inline __m256i __DEFAULT_FN_ATTRS\n" |
| 13275 | "_mm256_setr_m128i (__m128i __lo, __m128i __hi)\n" |
| 13276 | "{\n" |
| 13277 | " return (__m256i)_mm256_set_m128i(__hi, __lo);\n" |
| 13278 | "}\n" |
| 13279 | "\n" |
| 13280 | "#undef __DEFAULT_FN_ATTRS\n" |
| 13281 | "#undef __DEFAULT_FN_ATTRS128\n" |
| 13282 | "\n" |
| 13283 | "#endif /* __AVXINTRIN_H */\n" |
| 13284 | "" } , |
| 13285 | { "/builtins/bmi2intrin.h" , "/*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===\n" |
| 13286 | " *\n" |
| 13287 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 13288 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 13289 | " * in the Software without restriction, including without limitation the rights\n" |
| 13290 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 13291 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 13292 | " * furnished to do so, subject to the following conditions:\n" |
| 13293 | " *\n" |
| 13294 | " * The above copyright notice and this permission notice shall be included in\n" |
| 13295 | " * all copies or substantial portions of the Software.\n" |
| 13296 | " *\n" |
| 13297 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 13298 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 13299 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 13300 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 13301 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 13302 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 13303 | " * THE SOFTWARE.\n" |
| 13304 | " *\n" |
| 13305 | " *===-----------------------------------------------------------------------===\n" |
| 13306 | " */\n" |
| 13307 | "\n" |
| 13308 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 13309 | "#error \"Never use <bmi2intrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 13310 | "#endif\n" |
| 13311 | "\n" |
| 13312 | "#ifndef __BMI2INTRIN_H\n" |
| 13313 | "#define __BMI2INTRIN_H\n" |
| 13314 | "\n" |
| 13315 | "/* Define the default attributes for the functions in this file. */\n" |
| 13316 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi2\")))\n" |
| 13317 | "\n" |
| 13318 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13319 | "_bzhi_u32(unsigned int __X, unsigned int __Y)\n" |
| 13320 | "{\n" |
| 13321 | " return __builtin_ia32_bzhi_si(__X, __Y);\n" |
| 13322 | "}\n" |
| 13323 | "\n" |
| 13324 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13325 | "_pdep_u32(unsigned int __X, unsigned int __Y)\n" |
| 13326 | "{\n" |
| 13327 | " return __builtin_ia32_pdep_si(__X, __Y);\n" |
| 13328 | "}\n" |
| 13329 | "\n" |
| 13330 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13331 | "_pext_u32(unsigned int __X, unsigned int __Y)\n" |
| 13332 | "{\n" |
| 13333 | " return __builtin_ia32_pext_si(__X, __Y);\n" |
| 13334 | "}\n" |
| 13335 | "\n" |
| 13336 | "#ifdef __x86_64__\n" |
| 13337 | "\n" |
| 13338 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13339 | "_bzhi_u64(unsigned long long __X, unsigned long long __Y)\n" |
| 13340 | "{\n" |
| 13341 | " return __builtin_ia32_bzhi_di(__X, __Y);\n" |
| 13342 | "}\n" |
| 13343 | "\n" |
| 13344 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13345 | "_pdep_u64(unsigned long long __X, unsigned long long __Y)\n" |
| 13346 | "{\n" |
| 13347 | " return __builtin_ia32_pdep_di(__X, __Y);\n" |
| 13348 | "}\n" |
| 13349 | "\n" |
| 13350 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13351 | "_pext_u64(unsigned long long __X, unsigned long long __Y)\n" |
| 13352 | "{\n" |
| 13353 | " return __builtin_ia32_pext_di(__X, __Y);\n" |
| 13354 | "}\n" |
| 13355 | "\n" |
| 13356 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13357 | "_mulx_u64 (unsigned long long __X, unsigned long long __Y,\n" |
| 13358 | " unsigned long long *__P)\n" |
| 13359 | "{\n" |
| 13360 | " unsigned __int128 __res = (unsigned __int128) __X * __Y;\n" |
| 13361 | " *__P = (unsigned long long) (__res >> 64);\n" |
| 13362 | " return (unsigned long long) __res;\n" |
| 13363 | "}\n" |
| 13364 | "\n" |
| 13365 | "#else /* !__x86_64__ */\n" |
| 13366 | "\n" |
| 13367 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13368 | "_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)\n" |
| 13369 | "{\n" |
| 13370 | " unsigned long long __res = (unsigned long long) __X * __Y;\n" |
| 13371 | " *__P = (unsigned int) (__res >> 32);\n" |
| 13372 | " return (unsigned int) __res;\n" |
| 13373 | "}\n" |
| 13374 | "\n" |
| 13375 | "#endif /* !__x86_64__ */\n" |
| 13376 | "\n" |
| 13377 | "#undef __DEFAULT_FN_ATTRS\n" |
| 13378 | "\n" |
| 13379 | "#endif /* __BMI2INTRIN_H */\n" |
| 13380 | "" } , |
| 13381 | { "/builtins/bmiintrin.h" , "/*===---- bmiintrin.h - BMI intrinsics -------------------------------------===\n" |
| 13382 | " *\n" |
| 13383 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 13384 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 13385 | " * in the Software without restriction, including without limitation the rights\n" |
| 13386 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 13387 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 13388 | " * furnished to do so, subject to the following conditions:\n" |
| 13389 | " *\n" |
| 13390 | " * The above copyright notice and this permission notice shall be included in\n" |
| 13391 | " * all copies or substantial portions of the Software.\n" |
| 13392 | " *\n" |
| 13393 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 13394 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 13395 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 13396 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 13397 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 13398 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 13399 | " * THE SOFTWARE.\n" |
| 13400 | " *\n" |
| 13401 | " *===-----------------------------------------------------------------------===\n" |
| 13402 | " */\n" |
| 13403 | "\n" |
| 13404 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 13405 | "#error \"Never use <bmiintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 13406 | "#endif\n" |
| 13407 | "\n" |
| 13408 | "#ifndef __BMIINTRIN_H\n" |
| 13409 | "#define __BMIINTRIN_H\n" |
| 13410 | "\n" |
| 13411 | "#define _tzcnt_u16(a) (__tzcnt_u16((a)))\n" |
| 13412 | "\n" |
| 13413 | "#define _andn_u32(a, b) (__andn_u32((a), (b)))\n" |
| 13414 | "\n" |
| 13415 | "/* _bextr_u32 != __bextr_u32 */\n" |
| 13416 | "#define _blsi_u32(a) (__blsi_u32((a)))\n" |
| 13417 | "\n" |
| 13418 | "#define _blsmsk_u32(a) (__blsmsk_u32((a)))\n" |
| 13419 | "\n" |
| 13420 | "#define _blsr_u32(a) (__blsr_u32((a)))\n" |
| 13421 | "\n" |
| 13422 | "#define _tzcnt_u32(a) (__tzcnt_u32((a)))\n" |
| 13423 | "\n" |
| 13424 | "/* Define the default attributes for the functions in this file. */\n" |
| 13425 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"bmi\")))\n" |
| 13426 | "\n" |
| 13427 | "/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT\n" |
| 13428 | " instruction behaves as BSF on non-BMI targets, there is code that expects\n" |
| 13429 | " to use it as a potentially faster version of BSF. */\n" |
| 13430 | "#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n" |
| 13431 | "\n" |
| 13432 | "/// Counts the number of trailing zero bits in the operand.\n" |
| 13433 | "///\n" |
| 13434 | "/// \\headerfile <x86intrin.h>\n" |
| 13435 | "///\n" |
| 13436 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
| 13437 | "///\n" |
| 13438 | "/// \\param __X\n" |
| 13439 | "/// An unsigned 16-bit integer whose trailing zeros are to be counted.\n" |
| 13440 | "/// \\returns An unsigned 16-bit integer containing the number of trailing zero\n" |
| 13441 | "/// bits in the operand.\n" |
| 13442 | "static __inline__ unsigned short __RELAXED_FN_ATTRS\n" |
| 13443 | "__tzcnt_u16(unsigned short __X)\n" |
| 13444 | "{\n" |
| 13445 | " return __X ? __builtin_ctzs(__X) : 16;\n" |
| 13446 | "}\n" |
| 13447 | "\n" |
| 13448 | "/// Performs a bitwise AND of the second operand with the one's\n" |
| 13449 | "/// complement of the first operand.\n" |
| 13450 | "///\n" |
| 13451 | "/// \\headerfile <x86intrin.h>\n" |
| 13452 | "///\n" |
| 13453 | "/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n" |
| 13454 | "///\n" |
| 13455 | "/// \\param __X\n" |
| 13456 | "/// An unsigned integer containing one of the operands.\n" |
| 13457 | "/// \\param __Y\n" |
| 13458 | "/// An unsigned integer containing one of the operands.\n" |
| 13459 | "/// \\returns An unsigned integer containing the bitwise AND of the second\n" |
| 13460 | "/// operand with the one's complement of the first operand.\n" |
| 13461 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13462 | "__andn_u32(unsigned int __X, unsigned int __Y)\n" |
| 13463 | "{\n" |
| 13464 | " return ~__X & __Y;\n" |
| 13465 | "}\n" |
| 13466 | "\n" |
| 13467 | "/* AMD-specified, double-leading-underscore version of BEXTR */\n" |
| 13468 | "/// Extracts the specified bits from the first operand and returns them\n" |
| 13469 | "/// in the least significant bits of the result.\n" |
| 13470 | "///\n" |
| 13471 | "/// \\headerfile <x86intrin.h>\n" |
| 13472 | "///\n" |
| 13473 | "/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n" |
| 13474 | "///\n" |
| 13475 | "/// \\param __X\n" |
| 13476 | "/// An unsigned integer whose bits are to be extracted.\n" |
| 13477 | "/// \\param __Y\n" |
| 13478 | "/// An unsigned integer used to specify which bits are extracted. Bits [7:0]\n" |
| 13479 | "/// specify the index of the least significant bit. Bits [15:8] specify the\n" |
| 13480 | "/// number of bits to be extracted.\n" |
| 13481 | "/// \\returns An unsigned integer whose least significant bits contain the\n" |
| 13482 | "/// extracted bits.\n" |
| 13483 | "/// \\see _bextr_u32\n" |
| 13484 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13485 | "__bextr_u32(unsigned int __X, unsigned int __Y)\n" |
| 13486 | "{\n" |
| 13487 | " return __builtin_ia32_bextr_u32(__X, __Y);\n" |
| 13488 | "}\n" |
| 13489 | "\n" |
| 13490 | "/* Intel-specified, single-leading-underscore version of BEXTR */\n" |
| 13491 | "/// Extracts the specified bits from the first operand and returns them\n" |
| 13492 | "/// in the least significant bits of the result.\n" |
| 13493 | "///\n" |
| 13494 | "/// \\headerfile <x86intrin.h>\n" |
| 13495 | "///\n" |
| 13496 | "/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n" |
| 13497 | "///\n" |
| 13498 | "/// \\param __X\n" |
| 13499 | "/// An unsigned integer whose bits are to be extracted.\n" |
| 13500 | "/// \\param __Y\n" |
| 13501 | "/// An unsigned integer used to specify the index of the least significant\n" |
| 13502 | "/// bit for the bits to be extracted. Bits [7:0] specify the index.\n" |
| 13503 | "/// \\param __Z\n" |
| 13504 | "/// An unsigned integer used to specify the number of bits to be extracted.\n" |
| 13505 | "/// Bits [7:0] specify the number of bits.\n" |
| 13506 | "/// \\returns An unsigned integer whose least significant bits contain the\n" |
| 13507 | "/// extracted bits.\n" |
| 13508 | "/// \\see __bextr_u32\n" |
| 13509 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13510 | "_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)\n" |
| 13511 | "{\n" |
| 13512 | " return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n" |
| 13513 | "}\n" |
| 13514 | "\n" |
| 13515 | "/// Clears all bits in the source except for the least significant bit\n" |
| 13516 | "/// containing a value of 1 and returns the result.\n" |
| 13517 | "///\n" |
| 13518 | "/// \\headerfile <x86intrin.h>\n" |
| 13519 | "///\n" |
| 13520 | "/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n" |
| 13521 | "///\n" |
| 13522 | "/// \\param __X\n" |
| 13523 | "/// An unsigned integer whose bits are to be cleared.\n" |
| 13524 | "/// \\returns An unsigned integer containing the result of clearing the bits from\n" |
| 13525 | "/// the source operand.\n" |
| 13526 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13527 | "__blsi_u32(unsigned int __X)\n" |
| 13528 | "{\n" |
| 13529 | " return __X & -__X;\n" |
| 13530 | "}\n" |
| 13531 | "\n" |
| 13532 | "/// Creates a mask whose bits are set to 1, using bit 0 up to and\n" |
| 13533 | "/// including the least significant bit that is set to 1 in the source\n" |
| 13534 | "/// operand and returns the result.\n" |
| 13535 | "///\n" |
| 13536 | "/// \\headerfile <x86intrin.h>\n" |
| 13537 | "///\n" |
| 13538 | "/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n" |
| 13539 | "///\n" |
| 13540 | "/// \\param __X\n" |
| 13541 | "/// An unsigned integer used to create the mask.\n" |
| 13542 | "/// \\returns An unsigned integer containing the newly created mask.\n" |
| 13543 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13544 | "__blsmsk_u32(unsigned int __X)\n" |
| 13545 | "{\n" |
| 13546 | " return __X ^ (__X - 1);\n" |
| 13547 | "}\n" |
| 13548 | "\n" |
| 13549 | "/// Clears the least significant bit that is set to 1 in the source\n" |
| 13550 | "/// operand and returns the result.\n" |
| 13551 | "///\n" |
| 13552 | "/// \\headerfile <x86intrin.h>\n" |
| 13553 | "///\n" |
| 13554 | "/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n" |
| 13555 | "///\n" |
| 13556 | "/// \\param __X\n" |
| 13557 | "/// An unsigned integer containing the operand to be cleared.\n" |
| 13558 | "/// \\returns An unsigned integer containing the result of clearing the source\n" |
| 13559 | "/// operand.\n" |
| 13560 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 13561 | "__blsr_u32(unsigned int __X)\n" |
| 13562 | "{\n" |
| 13563 | " return __X & (__X - 1);\n" |
| 13564 | "}\n" |
| 13565 | "\n" |
| 13566 | "/// Counts the number of trailing zero bits in the operand.\n" |
| 13567 | "///\n" |
| 13568 | "/// \\headerfile <x86intrin.h>\n" |
| 13569 | "///\n" |
| 13570 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
| 13571 | "///\n" |
| 13572 | "/// \\param __X\n" |
| 13573 | "/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n" |
| 13574 | "/// \\returns An unsigned 32-bit integer containing the number of trailing zero\n" |
| 13575 | "/// bits in the operand.\n" |
| 13576 | "static __inline__ unsigned int __RELAXED_FN_ATTRS\n" |
| 13577 | "__tzcnt_u32(unsigned int __X)\n" |
| 13578 | "{\n" |
| 13579 | " return __X ? __builtin_ctz(__X) : 32;\n" |
| 13580 | "}\n" |
| 13581 | "\n" |
| 13582 | "/// Counts the number of trailing zero bits in the operand.\n" |
| 13583 | "///\n" |
| 13584 | "/// \\headerfile <x86intrin.h>\n" |
| 13585 | "///\n" |
| 13586 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
| 13587 | "///\n" |
| 13588 | "/// \\param __X\n" |
| 13589 | "/// An unsigned 32-bit integer whose trailing zeros are to be counted.\n" |
| 13590 | "/// \\returns An 32-bit integer containing the number of trailing zero bits in\n" |
| 13591 | "/// the operand.\n" |
| 13592 | "static __inline__ int __RELAXED_FN_ATTRS\n" |
| 13593 | "_mm_tzcnt_32(unsigned int __X)\n" |
| 13594 | "{\n" |
| 13595 | " return __X ? __builtin_ctz(__X) : 32;\n" |
| 13596 | "}\n" |
| 13597 | "\n" |
| 13598 | "#ifdef __x86_64__\n" |
| 13599 | "\n" |
| 13600 | "#define _andn_u64(a, b) (__andn_u64((a), (b)))\n" |
| 13601 | "\n" |
| 13602 | "/* _bextr_u64 != __bextr_u64 */\n" |
| 13603 | "#define _blsi_u64(a) (__blsi_u64((a)))\n" |
| 13604 | "\n" |
| 13605 | "#define _blsmsk_u64(a) (__blsmsk_u64((a)))\n" |
| 13606 | "\n" |
| 13607 | "#define _blsr_u64(a) (__blsr_u64((a)))\n" |
| 13608 | "\n" |
| 13609 | "#define _tzcnt_u64(a) (__tzcnt_u64((a)))\n" |
| 13610 | "\n" |
| 13611 | "/// Performs a bitwise AND of the second operand with the one's\n" |
| 13612 | "/// complement of the first operand.\n" |
| 13613 | "///\n" |
| 13614 | "/// \\headerfile <x86intrin.h>\n" |
| 13615 | "///\n" |
| 13616 | "/// This intrinsic corresponds to the <c> ANDN </c> instruction.\n" |
| 13617 | "///\n" |
| 13618 | "/// \\param __X\n" |
| 13619 | "/// An unsigned 64-bit integer containing one of the operands.\n" |
| 13620 | "/// \\param __Y\n" |
| 13621 | "/// An unsigned 64-bit integer containing one of the operands.\n" |
| 13622 | "/// \\returns An unsigned 64-bit integer containing the bitwise AND of the second\n" |
| 13623 | "/// operand with the one's complement of the first operand.\n" |
| 13624 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13625 | "__andn_u64 (unsigned long long __X, unsigned long long __Y)\n" |
| 13626 | "{\n" |
| 13627 | " return ~__X & __Y;\n" |
| 13628 | "}\n" |
| 13629 | "\n" |
| 13630 | "/* AMD-specified, double-leading-underscore version of BEXTR */\n" |
| 13631 | "/// Extracts the specified bits from the first operand and returns them\n" |
| 13632 | "/// in the least significant bits of the result.\n" |
| 13633 | "///\n" |
| 13634 | "/// \\headerfile <x86intrin.h>\n" |
| 13635 | "///\n" |
| 13636 | "/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n" |
| 13637 | "///\n" |
| 13638 | "/// \\param __X\n" |
| 13639 | "/// An unsigned 64-bit integer whose bits are to be extracted.\n" |
| 13640 | "/// \\param __Y\n" |
| 13641 | "/// An unsigned 64-bit integer used to specify which bits are extracted. Bits\n" |
| 13642 | "/// [7:0] specify the index of the least significant bit. Bits [15:8] specify\n" |
| 13643 | "/// the number of bits to be extracted.\n" |
| 13644 | "/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n" |
| 13645 | "/// extracted bits.\n" |
| 13646 | "/// \\see _bextr_u64\n" |
| 13647 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13648 | "__bextr_u64(unsigned long long __X, unsigned long long __Y)\n" |
| 13649 | "{\n" |
| 13650 | " return __builtin_ia32_bextr_u64(__X, __Y);\n" |
| 13651 | "}\n" |
| 13652 | "\n" |
| 13653 | "/* Intel-specified, single-leading-underscore version of BEXTR */\n" |
| 13654 | "/// Extracts the specified bits from the first operand and returns them\n" |
| 13655 | "/// in the least significant bits of the result.\n" |
| 13656 | "///\n" |
| 13657 | "/// \\headerfile <x86intrin.h>\n" |
| 13658 | "///\n" |
| 13659 | "/// This intrinsic corresponds to the <c> BEXTR </c> instruction.\n" |
| 13660 | "///\n" |
| 13661 | "/// \\param __X\n" |
| 13662 | "/// An unsigned 64-bit integer whose bits are to be extracted.\n" |
| 13663 | "/// \\param __Y\n" |
| 13664 | "/// An unsigned integer used to specify the index of the least significant\n" |
| 13665 | "/// bit for the bits to be extracted. Bits [7:0] specify the index.\n" |
| 13666 | "/// \\param __Z\n" |
| 13667 | "/// An unsigned integer used to specify the number of bits to be extracted.\n" |
| 13668 | "/// Bits [7:0] specify the number of bits.\n" |
| 13669 | "/// \\returns An unsigned 64-bit integer whose least significant bits contain the\n" |
| 13670 | "/// extracted bits.\n" |
| 13671 | "/// \\see __bextr_u64\n" |
| 13672 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13673 | "_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)\n" |
| 13674 | "{\n" |
| 13675 | " return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));\n" |
| 13676 | "}\n" |
| 13677 | "\n" |
| 13678 | "/// Clears all bits in the source except for the least significant bit\n" |
| 13679 | "/// containing a value of 1 and returns the result.\n" |
| 13680 | "///\n" |
| 13681 | "/// \\headerfile <x86intrin.h>\n" |
| 13682 | "///\n" |
| 13683 | "/// This intrinsic corresponds to the <c> BLSI </c> instruction.\n" |
| 13684 | "///\n" |
| 13685 | "/// \\param __X\n" |
| 13686 | "/// An unsigned 64-bit integer whose bits are to be cleared.\n" |
| 13687 | "/// \\returns An unsigned 64-bit integer containing the result of clearing the\n" |
| 13688 | "/// bits from the source operand.\n" |
| 13689 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13690 | "__blsi_u64(unsigned long long __X)\n" |
| 13691 | "{\n" |
| 13692 | " return __X & -__X;\n" |
| 13693 | "}\n" |
| 13694 | "\n" |
| 13695 | "/// Creates a mask whose bits are set to 1, using bit 0 up to and\n" |
| 13696 | "/// including the least significant bit that is set to 1 in the source\n" |
| 13697 | "/// operand and returns the result.\n" |
| 13698 | "///\n" |
| 13699 | "/// \\headerfile <x86intrin.h>\n" |
| 13700 | "///\n" |
| 13701 | "/// This intrinsic corresponds to the <c> BLSMSK </c> instruction.\n" |
| 13702 | "///\n" |
| 13703 | "/// \\param __X\n" |
| 13704 | "/// An unsigned 64-bit integer used to create the mask.\n" |
| 13705 | "/// \\returns An unsigned 64-bit integer containing the newly created mask.\n" |
| 13706 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13707 | "__blsmsk_u64(unsigned long long __X)\n" |
| 13708 | "{\n" |
| 13709 | " return __X ^ (__X - 1);\n" |
| 13710 | "}\n" |
| 13711 | "\n" |
| 13712 | "/// Clears the least significant bit that is set to 1 in the source\n" |
| 13713 | "/// operand and returns the result.\n" |
| 13714 | "///\n" |
| 13715 | "/// \\headerfile <x86intrin.h>\n" |
| 13716 | "///\n" |
| 13717 | "/// This intrinsic corresponds to the <c> BLSR </c> instruction.\n" |
| 13718 | "///\n" |
| 13719 | "/// \\param __X\n" |
| 13720 | "/// An unsigned 64-bit integer containing the operand to be cleared.\n" |
| 13721 | "/// \\returns An unsigned 64-bit integer containing the result of clearing the\n" |
| 13722 | "/// source operand.\n" |
| 13723 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 13724 | "__blsr_u64(unsigned long long __X)\n" |
| 13725 | "{\n" |
| 13726 | " return __X & (__X - 1);\n" |
| 13727 | "}\n" |
| 13728 | "\n" |
| 13729 | "/// Counts the number of trailing zero bits in the operand.\n" |
| 13730 | "///\n" |
| 13731 | "/// \\headerfile <x86intrin.h>\n" |
| 13732 | "///\n" |
| 13733 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
| 13734 | "///\n" |
| 13735 | "/// \\param __X\n" |
| 13736 | "/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n" |
| 13737 | "/// \\returns An unsigned 64-bit integer containing the number of trailing zero\n" |
| 13738 | "/// bits in the operand.\n" |
| 13739 | "static __inline__ unsigned long long __RELAXED_FN_ATTRS\n" |
| 13740 | "__tzcnt_u64(unsigned long long __X)\n" |
| 13741 | "{\n" |
| 13742 | " return __X ? __builtin_ctzll(__X) : 64;\n" |
| 13743 | "}\n" |
| 13744 | "\n" |
| 13745 | "/// Counts the number of trailing zero bits in the operand.\n" |
| 13746 | "///\n" |
| 13747 | "/// \\headerfile <x86intrin.h>\n" |
| 13748 | "///\n" |
| 13749 | "/// This intrinsic corresponds to the <c> TZCNT </c> instruction.\n" |
| 13750 | "///\n" |
| 13751 | "/// \\param __X\n" |
| 13752 | "/// An unsigned 64-bit integer whose trailing zeros are to be counted.\n" |
| 13753 | "/// \\returns An 64-bit integer containing the number of trailing zero bits in\n" |
| 13754 | "/// the operand.\n" |
| 13755 | "static __inline__ long long __RELAXED_FN_ATTRS\n" |
| 13756 | "_mm_tzcnt_64(unsigned long long __X)\n" |
| 13757 | "{\n" |
| 13758 | " return __X ? __builtin_ctzll(__X) : 64;\n" |
| 13759 | "}\n" |
| 13760 | "\n" |
| 13761 | "#endif /* __x86_64__ */\n" |
| 13762 | "\n" |
| 13763 | "#undef __DEFAULT_FN_ATTRS\n" |
| 13764 | "#undef __RELAXED_FN_ATTRS\n" |
| 13765 | "\n" |
| 13766 | "#endif /* __BMIINTRIN_H */\n" |
| 13767 | "" } , |
| 13768 | { "/builtins/cetintrin.h" , "/*===---- cetintrin.h - CET intrinsic --------------------------------------===\n" |
| 13769 | " *\n" |
| 13770 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 13771 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 13772 | " * in the Software without restriction, including without limitation the rights\n" |
| 13773 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 13774 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 13775 | " * furnished to do so, subject to the following conditions:\n" |
| 13776 | " *\n" |
| 13777 | " * The above copyright notice and this permission notice shall be included in\n" |
| 13778 | " * all copies or substantial portions of the Software.\n" |
| 13779 | " *\n" |
| 13780 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 13781 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 13782 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 13783 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 13784 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 13785 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 13786 | " * THE SOFTWARE.\n" |
| 13787 | " *\n" |
| 13788 | " *===-----------------------------------------------------------------------===\n" |
| 13789 | " */\n" |
| 13790 | "\n" |
| 13791 | "#ifndef __IMMINTRIN_H\n" |
| 13792 | "#error \"Never use <cetintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 13793 | "#endif\n" |
| 13794 | "\n" |
| 13795 | "#ifndef __CETINTRIN_H\n" |
| 13796 | "#define __CETINTRIN_H\n" |
| 13797 | "\n" |
| 13798 | "/* Define the default attributes for the functions in this file. */\n" |
| 13799 | "#define __DEFAULT_FN_ATTRS \\\n" |
| 13800 | " __attribute__((__always_inline__, __nodebug__, __target__(\"shstk\")))\n" |
| 13801 | "\n" |
| 13802 | "static __inline__ void __DEFAULT_FN_ATTRS _incsspd(int __a) {\n" |
| 13803 | " __builtin_ia32_incsspd(__a);\n" |
| 13804 | "}\n" |
| 13805 | "\n" |
| 13806 | "#ifdef __x86_64__\n" |
| 13807 | "static __inline__ void __DEFAULT_FN_ATTRS _incsspq(unsigned long long __a) {\n" |
| 13808 | " __builtin_ia32_incsspq(__a);\n" |
| 13809 | "}\n" |
| 13810 | "#endif /* __x86_64__ */\n" |
| 13811 | "\n" |
| 13812 | "#ifdef __x86_64__\n" |
| 13813 | "static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n" |
| 13814 | " __builtin_ia32_incsspq(__a);\n" |
| 13815 | "}\n" |
| 13816 | "#else /* __x86_64__ */\n" |
| 13817 | "static __inline__ void __DEFAULT_FN_ATTRS _inc_ssp(unsigned int __a) {\n" |
| 13818 | " __builtin_ia32_incsspd((int)__a);\n" |
| 13819 | "}\n" |
| 13820 | "#endif /* __x86_64__ */\n" |
| 13821 | "\n" |
| 13822 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS _rdsspd(unsigned int __a) {\n" |
| 13823 | " return __builtin_ia32_rdsspd(__a);\n" |
| 13824 | "}\n" |
| 13825 | "\n" |
| 13826 | "#ifdef __x86_64__\n" |
| 13827 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS _rdsspq(unsigned long long __a) {\n" |
| 13828 | " return __builtin_ia32_rdsspq(__a);\n" |
| 13829 | "}\n" |
| 13830 | "#endif /* __x86_64__ */\n" |
| 13831 | "\n" |
| 13832 | "#ifdef __x86_64__\n" |
| 13833 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS _get_ssp(void) {\n" |
| 13834 | " return __builtin_ia32_rdsspq(0);\n" |
| 13835 | "}\n" |
| 13836 | "#else /* __x86_64__ */\n" |
| 13837 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS _get_ssp(void) {\n" |
| 13838 | " return __builtin_ia32_rdsspd(0);\n" |
| 13839 | "}\n" |
| 13840 | "#endif /* __x86_64__ */\n" |
| 13841 | "\n" |
| 13842 | "static __inline__ void __DEFAULT_FN_ATTRS _saveprevssp() {\n" |
| 13843 | " __builtin_ia32_saveprevssp();\n" |
| 13844 | "}\n" |
| 13845 | "\n" |
| 13846 | "static __inline__ void __DEFAULT_FN_ATTRS _rstorssp(void * __p) {\n" |
| 13847 | " __builtin_ia32_rstorssp(__p);\n" |
| 13848 | "}\n" |
| 13849 | "\n" |
| 13850 | "static __inline__ void __DEFAULT_FN_ATTRS _wrssd(unsigned int __a, void * __p) {\n" |
| 13851 | " __builtin_ia32_wrssd(__a, __p);\n" |
| 13852 | "}\n" |
| 13853 | "\n" |
| 13854 | "#ifdef __x86_64__\n" |
| 13855 | "static __inline__ void __DEFAULT_FN_ATTRS _wrssq(unsigned long long __a, void * __p) {\n" |
| 13856 | " __builtin_ia32_wrssq(__a, __p);\n" |
| 13857 | "}\n" |
| 13858 | "#endif /* __x86_64__ */\n" |
| 13859 | "\n" |
| 13860 | "static __inline__ void __DEFAULT_FN_ATTRS _wrussd(unsigned int __a, void * __p) {\n" |
| 13861 | " __builtin_ia32_wrussd(__a, __p);\n" |
| 13862 | "}\n" |
| 13863 | "\n" |
| 13864 | "#ifdef __x86_64__\n" |
| 13865 | "static __inline__ void __DEFAULT_FN_ATTRS _wrussq(unsigned long long __a, void * __p) {\n" |
| 13866 | " __builtin_ia32_wrussq(__a, __p);\n" |
| 13867 | "}\n" |
| 13868 | "#endif /* __x86_64__ */\n" |
| 13869 | "\n" |
| 13870 | "static __inline__ void __DEFAULT_FN_ATTRS _setssbsy() {\n" |
| 13871 | " __builtin_ia32_setssbsy();\n" |
| 13872 | "}\n" |
| 13873 | "\n" |
| 13874 | "static __inline__ void __DEFAULT_FN_ATTRS _clrssbsy(void * __p) {\n" |
| 13875 | " __builtin_ia32_clrssbsy(__p);\n" |
| 13876 | "}\n" |
| 13877 | "\n" |
| 13878 | "#undef __DEFAULT_FN_ATTRS\n" |
| 13879 | "\n" |
| 13880 | "#endif /* __CETINTRIN_H */\n" |
| 13881 | "" } , |
| 13882 | { "/builtins/cldemoteintrin.h" , "/*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------===\n" |
| 13883 | " *\n" |
| 13884 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 13885 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 13886 | " * in the Software without restriction, including without limitation the rights\n" |
| 13887 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 13888 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 13889 | " * furnished to do so, subject to the following conditions:\n" |
| 13890 | " *\n" |
| 13891 | " * The above copyright notice and this permission notice shall be included in\n" |
| 13892 | " * all copies or substantial portions of the Software.\n" |
| 13893 | " *\n" |
| 13894 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 13895 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 13896 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 13897 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 13898 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 13899 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 13900 | " * THE SOFTWARE.\n" |
| 13901 | " *\n" |
| 13902 | " *===-----------------------------------------------------------------------===\n" |
| 13903 | " */\n" |
| 13904 | "\n" |
| 13905 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 13906 | "#error \"Never use <cldemoteintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 13907 | "#endif\n" |
| 13908 | "\n" |
| 13909 | "#ifndef __CLDEMOTEINTRIN_H\n" |
| 13910 | "#define __CLDEMOTEINTRIN_H\n" |
| 13911 | "\n" |
| 13912 | "/* Define the default attributes for the functions in this file. */\n" |
| 13913 | "#define __DEFAULT_FN_ATTRS \\\n" |
| 13914 | " __attribute__((__always_inline__, __nodebug__, __target__(\"cldemote\")))\n" |
| 13915 | "\n" |
| 13916 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 13917 | "_cldemote(const void * __P) {\n" |
| 13918 | " __builtin_ia32_cldemote(__P);\n" |
| 13919 | "}\n" |
| 13920 | "\n" |
| 13921 | "#undef __DEFAULT_FN_ATTRS\n" |
| 13922 | "\n" |
| 13923 | "#endif\n" |
| 13924 | "" } , |
| 13925 | { "/builtins/clflushoptintrin.h" , "/*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------===\n" |
| 13926 | " *\n" |
| 13927 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 13928 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 13929 | " * in the Software without restriction, including without limitation the rights\n" |
| 13930 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 13931 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 13932 | " * furnished to do so, subject to the following conditions:\n" |
| 13933 | " *\n" |
| 13934 | " * The above copyright notice and this permission notice shall be included in\n" |
| 13935 | " * all copies or substantial portions of the Software.\n" |
| 13936 | " *\n" |
| 13937 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 13938 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 13939 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 13940 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 13941 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 13942 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 13943 | " * THE SOFTWARE.\n" |
| 13944 | " *\n" |
| 13945 | " *===-----------------------------------------------------------------------===\n" |
| 13946 | " */\n" |
| 13947 | "\n" |
| 13948 | "#ifndef __IMMINTRIN_H\n" |
| 13949 | "#error \"Never use <clflushoptintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 13950 | "#endif\n" |
| 13951 | "\n" |
| 13952 | "#ifndef __CLFLUSHOPTINTRIN_H\n" |
| 13953 | "#define __CLFLUSHOPTINTRIN_H\n" |
| 13954 | "\n" |
| 13955 | "/* Define the default attributes for the functions in this file. */\n" |
| 13956 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clflushopt\")))\n" |
| 13957 | "\n" |
| 13958 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 13959 | "_mm_clflushopt(void const * __m) {\n" |
| 13960 | " __builtin_ia32_clflushopt(__m);\n" |
| 13961 | "}\n" |
| 13962 | "\n" |
| 13963 | "#undef __DEFAULT_FN_ATTRS\n" |
| 13964 | "\n" |
| 13965 | "#endif\n" |
| 13966 | "" } , |
| 13967 | { "/builtins/clwbintrin.h" , "/*===---- clwbintrin.h - CLWB intrinsic ------------------------------------===\n" |
| 13968 | " *\n" |
| 13969 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 13970 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 13971 | " * in the Software without restriction, including without limitation the rights\n" |
| 13972 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 13973 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 13974 | " * furnished to do so, subject to the following conditions:\n" |
| 13975 | " *\n" |
| 13976 | " * The above copyright notice and this permission notice shall be included in\n" |
| 13977 | " * all copies or substantial portions of the Software.\n" |
| 13978 | " *\n" |
| 13979 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 13980 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 13981 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 13982 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 13983 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 13984 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 13985 | " * THE SOFTWARE.\n" |
| 13986 | " *\n" |
| 13987 | " *===-----------------------------------------------------------------------===\n" |
| 13988 | " */\n" |
| 13989 | "\n" |
| 13990 | "#ifndef __IMMINTRIN_H\n" |
| 13991 | "#error \"Never use <clwbintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 13992 | "#endif\n" |
| 13993 | "\n" |
| 13994 | "#ifndef __CLWBINTRIN_H\n" |
| 13995 | "#define __CLWBINTRIN_H\n" |
| 13996 | "\n" |
| 13997 | "/* Define the default attributes for the functions in this file. */\n" |
| 13998 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"clwb\")))\n" |
| 13999 | "\n" |
| 14000 | "/// Writes back to memory the cache line (if modified) that contains the\n" |
| 14001 | "/// linear address specified in \\a __p from any level of the cache hierarchy in\n" |
| 14002 | "/// the cache coherence domain\n" |
| 14003 | "///\n" |
| 14004 | "/// \\headerfile <immintrin.h>\n" |
| 14005 | "///\n" |
| 14006 | "/// This intrinsic corresponds to the <c> CLWB </c> instruction.\n" |
| 14007 | "///\n" |
| 14008 | "/// \\param __p\n" |
| 14009 | "/// A pointer to the memory location used to identify the cache line to be\n" |
| 14010 | "/// written back.\n" |
| 14011 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 14012 | "_mm_clwb(void const *__p) {\n" |
| 14013 | " __builtin_ia32_clwb(__p);\n" |
| 14014 | "}\n" |
| 14015 | "\n" |
| 14016 | "#undef __DEFAULT_FN_ATTRS\n" |
| 14017 | "\n" |
| 14018 | "#endif\n" |
| 14019 | "" } , |
| 14020 | { "/builtins/clzerointrin.h" , "/*===----------------------- clzerointrin.h - CLZERO ----------------------===\n" |
| 14021 | " *\n" |
| 14022 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 14023 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 14024 | " * in the Software without restriction, including without limitation the rights\n" |
| 14025 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 14026 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 14027 | " * furnished to do so, subject to the following conditions:\n" |
| 14028 | " *\n" |
| 14029 | " * The above copyright notice and this permission notice shall be included in\n" |
| 14030 | " * all copies or substantial portions of the Software.\n" |
| 14031 | " *\n" |
| 14032 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 14033 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 14034 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 14035 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 14036 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 14037 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 14038 | " * THE SOFTWARE.\n" |
| 14039 | " *\n" |
| 14040 | " *===-----------------------------------------------------------------------===\n" |
| 14041 | " */\n" |
| 14042 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 14043 | "#error \"Never use <clzerointrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 14044 | "#endif\n" |
| 14045 | "\n" |
| 14046 | "#ifndef __CLZEROINTRIN_H\n" |
| 14047 | "#define __CLZEROINTRIN_H\n" |
| 14048 | "\n" |
| 14049 | "/* Define the default attributes for the functions in this file. */\n" |
| 14050 | "#define __DEFAULT_FN_ATTRS \\\n" |
| 14051 | " __attribute__((__always_inline__, __nodebug__, __target__(\"clzero\")))\n" |
| 14052 | "\n" |
| 14053 | "/// Loads the cache line address and zero's out the cacheline\n" |
| 14054 | "///\n" |
| 14055 | "/// \\headerfile <clzerointrin.h>\n" |
| 14056 | "///\n" |
| 14057 | "/// This intrinsic corresponds to the <c> CLZERO </c> instruction.\n" |
| 14058 | "///\n" |
| 14059 | "/// \\param __line\n" |
| 14060 | "/// A pointer to a cacheline which needs to be zeroed out.\n" |
| 14061 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 14062 | "_mm_clzero (void * __line)\n" |
| 14063 | "{\n" |
| 14064 | " __builtin_ia32_clzero ((void *)__line);\n" |
| 14065 | "}\n" |
| 14066 | "\n" |
| 14067 | "#undef __DEFAULT_FN_ATTRS\n" |
| 14068 | "\n" |
| 14069 | "#endif /* __CLZEROINTRIN_H */\n" |
| 14070 | "" } , |
| 14071 | { "/builtins/cpuid.h" , "/*===---- cpuid.h - X86 cpu model detection --------------------------------===\n" |
| 14072 | " *\n" |
| 14073 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 14074 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 14075 | " * in the Software without restriction, including without limitation the rights\n" |
| 14076 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 14077 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 14078 | " * furnished to do so, subject to the following conditions:\n" |
| 14079 | " *\n" |
| 14080 | " * The above copyright notice and this permission notice shall be included in\n" |
| 14081 | " * all copies or substantial portions of the Software.\n" |
| 14082 | " *\n" |
| 14083 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 14084 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 14085 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 14086 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 14087 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 14088 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 14089 | " * THE SOFTWARE.\n" |
| 14090 | " *\n" |
| 14091 | " *===-----------------------------------------------------------------------===\n" |
| 14092 | " */\n" |
| 14093 | "\n" |
| 14094 | "#if !(__x86_64__ || __i386__)\n" |
| 14095 | "#error this header is for x86 only\n" |
| 14096 | "#endif\n" |
| 14097 | "\n" |
| 14098 | "/* Responses identification request with %eax 0 */\n" |
| 14099 | "/* AMD: \"AuthenticAMD\" */\n" |
| 14100 | "#define signature_AMD_ebx 0x68747541\n" |
| 14101 | "#define signature_AMD_edx 0x69746e65\n" |
| 14102 | "#define signature_AMD_ecx 0x444d4163\n" |
| 14103 | "/* CENTAUR: \"CentaurHauls\" */\n" |
| 14104 | "#define signature_CENTAUR_ebx 0x746e6543\n" |
| 14105 | "#define signature_CENTAUR_edx 0x48727561\n" |
| 14106 | "#define signature_CENTAUR_ecx 0x736c7561\n" |
| 14107 | "/* CYRIX: \"CyrixInstead\" */\n" |
| 14108 | "#define signature_CYRIX_ebx 0x69727943\n" |
| 14109 | "#define signature_CYRIX_edx 0x736e4978\n" |
| 14110 | "#define signature_CYRIX_ecx 0x64616574\n" |
| 14111 | "/* INTEL: \"GenuineIntel\" */\n" |
| 14112 | "#define signature_INTEL_ebx 0x756e6547\n" |
| 14113 | "#define signature_INTEL_edx 0x49656e69\n" |
| 14114 | "#define signature_INTEL_ecx 0x6c65746e\n" |
| 14115 | "/* TM1: \"TransmetaCPU\" */\n" |
| 14116 | "#define signature_TM1_ebx 0x6e617254\n" |
| 14117 | "#define signature_TM1_edx 0x74656d73\n" |
| 14118 | "#define signature_TM1_ecx 0x55504361\n" |
| 14119 | "/* TM2: \"GenuineTMx86\" */\n" |
| 14120 | "#define signature_TM2_ebx 0x756e6547\n" |
| 14121 | "#define signature_TM2_edx 0x54656e69\n" |
| 14122 | "#define signature_TM2_ecx 0x3638784d\n" |
| 14123 | "/* NSC: \"Geode by NSC\" */\n" |
| 14124 | "#define signature_NSC_ebx 0x646f6547\n" |
| 14125 | "#define signature_NSC_edx 0x43534e20\n" |
| 14126 | "#define signature_NSC_ecx 0x79622065\n" |
| 14127 | "/* NEXGEN: \"NexGenDriven\" */\n" |
| 14128 | "#define signature_NEXGEN_ebx 0x4778654e\n" |
| 14129 | "#define signature_NEXGEN_edx 0x72446e65\n" |
| 14130 | "#define signature_NEXGEN_ecx 0x6e657669\n" |
| 14131 | "/* RISE: \"RiseRiseRise\" */\n" |
| 14132 | "#define signature_RISE_ebx 0x65736952\n" |
| 14133 | "#define signature_RISE_edx 0x65736952\n" |
| 14134 | "#define signature_RISE_ecx 0x65736952\n" |
| 14135 | "/* SIS: \"SiS SiS SiS \" */\n" |
| 14136 | "#define signature_SIS_ebx 0x20536953\n" |
| 14137 | "#define signature_SIS_edx 0x20536953\n" |
| 14138 | "#define signature_SIS_ecx 0x20536953\n" |
| 14139 | "/* UMC: \"UMC UMC UMC \" */\n" |
| 14140 | "#define signature_UMC_ebx 0x20434d55\n" |
| 14141 | "#define signature_UMC_edx 0x20434d55\n" |
| 14142 | "#define signature_UMC_ecx 0x20434d55\n" |
| 14143 | "/* VIA: \"VIA VIA VIA \" */\n" |
| 14144 | "#define signature_VIA_ebx 0x20414956\n" |
| 14145 | "#define signature_VIA_edx 0x20414956\n" |
| 14146 | "#define signature_VIA_ecx 0x20414956\n" |
| 14147 | "/* VORTEX: \"Vortex86 SoC\" */\n" |
| 14148 | "#define signature_VORTEX_ebx 0x74726f56\n" |
| 14149 | "#define signature_VORTEX_edx 0x36387865\n" |
| 14150 | "#define signature_VORTEX_ecx 0x436f5320\n" |
| 14151 | "\n" |
| 14152 | "/* Features in %ecx for leaf 1 */\n" |
| 14153 | "#define bit_SSE3 0x00000001\n" |
| 14154 | "#define bit_PCLMULQDQ 0x00000002\n" |
| 14155 | "#define bit_PCLMUL bit_PCLMULQDQ /* for gcc compat */\n" |
| 14156 | "#define bit_DTES64 0x00000004\n" |
| 14157 | "#define bit_MONITOR 0x00000008\n" |
| 14158 | "#define bit_DSCPL 0x00000010\n" |
| 14159 | "#define bit_VMX 0x00000020\n" |
| 14160 | "#define bit_SMX 0x00000040\n" |
| 14161 | "#define bit_EIST 0x00000080\n" |
| 14162 | "#define bit_TM2 0x00000100\n" |
| 14163 | "#define bit_SSSE3 0x00000200\n" |
| 14164 | "#define bit_CNXTID 0x00000400\n" |
| 14165 | "#define bit_FMA 0x00001000\n" |
| 14166 | "#define bit_CMPXCHG16B 0x00002000\n" |
| 14167 | "#define bit_xTPR 0x00004000\n" |
| 14168 | "#define bit_PDCM 0x00008000\n" |
| 14169 | "#define bit_PCID 0x00020000\n" |
| 14170 | "#define bit_DCA 0x00040000\n" |
| 14171 | "#define bit_SSE41 0x00080000\n" |
| 14172 | "#define bit_SSE4_1 bit_SSE41 /* for gcc compat */\n" |
| 14173 | "#define bit_SSE42 0x00100000\n" |
| 14174 | "#define bit_SSE4_2 bit_SSE42 /* for gcc compat */\n" |
| 14175 | "#define bit_x2APIC 0x00200000\n" |
| 14176 | "#define bit_MOVBE 0x00400000\n" |
| 14177 | "#define bit_POPCNT 0x00800000\n" |
| 14178 | "#define bit_TSCDeadline 0x01000000\n" |
| 14179 | "#define bit_AESNI 0x02000000\n" |
| 14180 | "#define bit_AES bit_AESNI /* for gcc compat */\n" |
| 14181 | "#define bit_XSAVE 0x04000000\n" |
| 14182 | "#define bit_OSXSAVE 0x08000000\n" |
| 14183 | "#define bit_AVX 0x10000000\n" |
| 14184 | "#define bit_F16C 0x20000000\n" |
| 14185 | "#define bit_RDRND 0x40000000\n" |
| 14186 | "\n" |
| 14187 | "/* Features in %edx for leaf 1 */\n" |
| 14188 | "#define bit_FPU 0x00000001\n" |
| 14189 | "#define bit_VME 0x00000002\n" |
| 14190 | "#define bit_DE 0x00000004\n" |
| 14191 | "#define bit_PSE 0x00000008\n" |
| 14192 | "#define bit_TSC 0x00000010\n" |
| 14193 | "#define bit_MSR 0x00000020\n" |
| 14194 | "#define bit_PAE 0x00000040\n" |
| 14195 | "#define bit_MCE 0x00000080\n" |
| 14196 | "#define bit_CX8 0x00000100\n" |
| 14197 | "#define bit_CMPXCHG8B bit_CX8 /* for gcc compat */\n" |
| 14198 | "#define bit_APIC 0x00000200\n" |
| 14199 | "#define bit_SEP 0x00000800\n" |
| 14200 | "#define bit_MTRR 0x00001000\n" |
| 14201 | "#define bit_PGE 0x00002000\n" |
| 14202 | "#define bit_MCA 0x00004000\n" |
| 14203 | "#define bit_CMOV 0x00008000\n" |
| 14204 | "#define bit_PAT 0x00010000\n" |
| 14205 | "#define bit_PSE36 0x00020000\n" |
| 14206 | "#define bit_PSN 0x00040000\n" |
| 14207 | "#define bit_CLFSH 0x00080000\n" |
| 14208 | "#define bit_DS 0x00200000\n" |
| 14209 | "#define bit_ACPI 0x00400000\n" |
| 14210 | "#define bit_MMX 0x00800000\n" |
| 14211 | "#define bit_FXSR 0x01000000\n" |
| 14212 | "#define bit_FXSAVE bit_FXSR /* for gcc compat */\n" |
| 14213 | "#define bit_SSE 0x02000000\n" |
| 14214 | "#define bit_SSE2 0x04000000\n" |
| 14215 | "#define bit_SS 0x08000000\n" |
| 14216 | "#define bit_HTT 0x10000000\n" |
| 14217 | "#define bit_TM 0x20000000\n" |
| 14218 | "#define bit_PBE 0x80000000\n" |
| 14219 | "\n" |
| 14220 | "/* Features in %ebx for leaf 7 sub-leaf 0 */\n" |
| 14221 | "#define bit_FSGSBASE 0x00000001\n" |
| 14222 | "#define bit_SGX 0x00000004\n" |
| 14223 | "#define bit_BMI 0x00000008\n" |
| 14224 | "#define bit_HLE 0x00000010\n" |
| 14225 | "#define bit_AVX2 0x00000020\n" |
| 14226 | "#define bit_SMEP 0x00000080\n" |
| 14227 | "#define bit_BMI2 0x00000100\n" |
| 14228 | "#define bit_ENH_MOVSB 0x00000200\n" |
| 14229 | "#define bit_INVPCID 0x00000400\n" |
| 14230 | "#define bit_RTM 0x00000800\n" |
| 14231 | "#define bit_MPX 0x00004000\n" |
| 14232 | "#define bit_AVX512F 0x00010000\n" |
| 14233 | "#define bit_AVX512DQ 0x00020000\n" |
| 14234 | "#define bit_RDSEED 0x00040000\n" |
| 14235 | "#define bit_ADX 0x00080000\n" |
| 14236 | "#define bit_AVX512IFMA 0x00200000\n" |
| 14237 | "#define bit_CLFLUSHOPT 0x00800000\n" |
| 14238 | "#define bit_CLWB 0x01000000\n" |
| 14239 | "#define bit_AVX512PF 0x04000000\n" |
| 14240 | "#define bit_AVX512ER 0x08000000\n" |
| 14241 | "#define bit_AVX512CD 0x10000000\n" |
| 14242 | "#define bit_SHA 0x20000000\n" |
| 14243 | "#define bit_AVX512BW 0x40000000\n" |
| 14244 | "#define bit_AVX512VL 0x80000000\n" |
| 14245 | "\n" |
| 14246 | "/* Features in %ecx for leaf 7 sub-leaf 0 */\n" |
| 14247 | "#define bit_PREFTCHWT1 0x00000001\n" |
| 14248 | "#define bit_AVX512VBMI 0x00000002\n" |
| 14249 | "#define bit_PKU 0x00000004\n" |
| 14250 | "#define bit_OSPKE 0x00000010\n" |
| 14251 | "#define bit_WAITPKG 0x00000020\n" |
| 14252 | "#define bit_AVX512VBMI2 0x00000040\n" |
| 14253 | "#define bit_SHSTK 0x00000080\n" |
| 14254 | "#define bit_GFNI 0x00000100\n" |
| 14255 | "#define bit_VAES 0x00000200\n" |
| 14256 | "#define bit_VPCLMULQDQ 0x00000400\n" |
| 14257 | "#define bit_AVX512VNNI 0x00000800\n" |
| 14258 | "#define bit_AVX512BITALG 0x00001000\n" |
| 14259 | "#define bit_AVX512VPOPCNTDQ 0x00004000\n" |
| 14260 | "#define bit_RDPID 0x00400000\n" |
| 14261 | "#define bit_CLDEMOTE 0x02000000\n" |
| 14262 | "#define bit_MOVDIRI 0x08000000\n" |
| 14263 | "#define bit_MOVDIR64B 0x10000000\n" |
| 14264 | "\n" |
| 14265 | "/* Features in %edx for leaf 7 sub-leaf 0 */\n" |
| 14266 | "#define bit_AVX5124VNNIW 0x00000004\n" |
| 14267 | "#define bit_AVX5124FMAPS 0x00000008\n" |
| 14268 | "#define bit_PCONFIG 0x00040000\n" |
| 14269 | "#define bit_IBT 0x00100000\n" |
| 14270 | "\n" |
| 14271 | "/* Features in %eax for leaf 13 sub-leaf 1 */\n" |
| 14272 | "#define bit_XSAVEOPT 0x00000001\n" |
| 14273 | "#define bit_XSAVEC 0x00000002\n" |
| 14274 | "#define bit_XSAVES 0x00000008\n" |
| 14275 | "\n" |
| 14276 | "/* Features in %eax for leaf 0x14 sub-leaf 0 */\n" |
| 14277 | "#define bit_PTWRITE 0x00000010\n" |
| 14278 | "\n" |
| 14279 | "/* Features in %ecx for leaf 0x80000001 */\n" |
| 14280 | "#define bit_LAHF_LM 0x00000001\n" |
| 14281 | "#define bit_ABM 0x00000020\n" |
| 14282 | "#define bit_LZCNT bit_ABM /* for gcc compat */\n" |
| 14283 | "#define bit_SSE4a 0x00000040\n" |
| 14284 | "#define bit_PRFCHW 0x00000100\n" |
| 14285 | "#define bit_XOP 0x00000800\n" |
| 14286 | "#define bit_LWP 0x00008000\n" |
| 14287 | "#define bit_FMA4 0x00010000\n" |
| 14288 | "#define bit_TBM 0x00200000\n" |
| 14289 | "#define bit_MWAITX 0x20000000\n" |
| 14290 | "\n" |
| 14291 | "/* Features in %edx for leaf 0x80000001 */\n" |
| 14292 | "#define bit_MMXEXT 0x00400000\n" |
| 14293 | "#define bit_LM 0x20000000\n" |
| 14294 | "#define bit_3DNOWP 0x40000000\n" |
| 14295 | "#define bit_3DNOW 0x80000000\n" |
| 14296 | "\n" |
| 14297 | "/* Features in %ebx for leaf 0x80000008 */\n" |
| 14298 | "#define bit_CLZERO 0x00000001\n" |
| 14299 | "#define bit_WBNOINVD 0x00000200\n" |
| 14300 | "\n" |
| 14301 | "\n" |
| 14302 | "#if __i386__\n" |
| 14303 | "#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n" |
| 14304 | " __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n" |
| 14305 | " : \"0\"(__leaf))\n" |
| 14306 | "\n" |
| 14307 | "#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n" |
| 14308 | " __asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n" |
| 14309 | " : \"0\"(__leaf), \"2\"(__count))\n" |
| 14310 | "#else\n" |
| 14311 | "/* x86-64 uses %rbx as the base register, so preserve it. */\n" |
| 14312 | "#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \\\n" |
| 14313 | " __asm(\" xchgq %%rbx,%q1\\n\" \\\n" |
| 14314 | " \" cpuid\\n\" \\\n" |
| 14315 | " \" xchgq %%rbx,%q1\" \\\n" |
| 14316 | " : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n" |
| 14317 | " : \"0\"(__leaf))\n" |
| 14318 | "\n" |
| 14319 | "#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \\\n" |
| 14320 | " __asm(\" xchgq %%rbx,%q1\\n\" \\\n" |
| 14321 | " \" cpuid\\n\" \\\n" |
| 14322 | " \" xchgq %%rbx,%q1\" \\\n" |
| 14323 | " : \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n" |
| 14324 | " : \"0\"(__leaf), \"2\"(__count))\n" |
| 14325 | "#endif\n" |
| 14326 | "\n" |
| 14327 | "static __inline int __get_cpuid_max (unsigned int __leaf, unsigned int *__sig)\n" |
| 14328 | "{\n" |
| 14329 | " unsigned int __eax, __ebx, __ecx, __edx;\n" |
| 14330 | "#if __i386__\n" |
| 14331 | " int __cpuid_supported;\n" |
| 14332 | "\n" |
| 14333 | " __asm(\" pushfl\\n\"\n" |
| 14334 | " \" popl %%eax\\n\"\n" |
| 14335 | " \" movl %%eax,%%ecx\\n\"\n" |
| 14336 | " \" xorl $0x00200000,%%eax\\n\"\n" |
| 14337 | " \" pushl %%eax\\n\"\n" |
| 14338 | " \" popfl\\n\"\n" |
| 14339 | " \" pushfl\\n\"\n" |
| 14340 | " \" popl %%eax\\n\"\n" |
| 14341 | " \" movl $0,%0\\n\"\n" |
| 14342 | " \" cmpl %%eax,%%ecx\\n\"\n" |
| 14343 | " \" je 1f\\n\"\n" |
| 14344 | " \" movl $1,%0\\n\"\n" |
| 14345 | " \"1:\"\n" |
| 14346 | " : \"=r\" (__cpuid_supported) : : \"eax\", \"ecx\");\n" |
| 14347 | " if (!__cpuid_supported)\n" |
| 14348 | " return 0;\n" |
| 14349 | "#endif\n" |
| 14350 | "\n" |
| 14351 | " __cpuid(__leaf, __eax, __ebx, __ecx, __edx);\n" |
| 14352 | " if (__sig)\n" |
| 14353 | " *__sig = __ebx;\n" |
| 14354 | " return __eax;\n" |
| 14355 | "}\n" |
| 14356 | "\n" |
| 14357 | "static __inline int __get_cpuid (unsigned int __leaf, unsigned int *__eax,\n" |
| 14358 | " unsigned int *__ebx, unsigned int *__ecx,\n" |
| 14359 | " unsigned int *__edx)\n" |
| 14360 | "{\n" |
| 14361 | " unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n" |
| 14362 | "\n" |
| 14363 | " if (__max_leaf == 0 || __max_leaf < __leaf)\n" |
| 14364 | " return 0;\n" |
| 14365 | "\n" |
| 14366 | " __cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx);\n" |
| 14367 | " return 1;\n" |
| 14368 | "}\n" |
| 14369 | "\n" |
| 14370 | "static __inline int __get_cpuid_count (unsigned int __leaf,\n" |
| 14371 | " unsigned int __subleaf,\n" |
| 14372 | " unsigned int *__eax, unsigned int *__ebx,\n" |
| 14373 | " unsigned int *__ecx, unsigned int *__edx)\n" |
| 14374 | "{\n" |
| 14375 | " unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);\n" |
| 14376 | "\n" |
| 14377 | " if (__max_leaf == 0 || __max_leaf < __leaf)\n" |
| 14378 | " return 0;\n" |
| 14379 | "\n" |
| 14380 | " __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);\n" |
| 14381 | " return 1;\n" |
| 14382 | "}\n" |
| 14383 | "" } , |
| 14384 | { "/builtins/emmintrin.h" , "/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===\n" |
| 14385 | " *\n" |
| 14386 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 14387 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 14388 | " * in the Software without restriction, including without limitation the rights\n" |
| 14389 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 14390 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 14391 | " * furnished to do so, subject to the following conditions:\n" |
| 14392 | " *\n" |
| 14393 | " * The above copyright notice and this permission notice shall be included in\n" |
| 14394 | " * all copies or substantial portions of the Software.\n" |
| 14395 | " *\n" |
| 14396 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 14397 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 14398 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 14399 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 14400 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 14401 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 14402 | " * THE SOFTWARE.\n" |
| 14403 | " *\n" |
| 14404 | " *===-----------------------------------------------------------------------===\n" |
| 14405 | " */\n" |
| 14406 | "\n" |
| 14407 | "#ifndef __EMMINTRIN_H\n" |
| 14408 | "#define __EMMINTRIN_H\n" |
| 14409 | "\n" |
| 14410 | "#include <xmmintrin.h>\n" |
| 14411 | "\n" |
| 14412 | "typedef double __m128d __attribute__((__vector_size__(16)));\n" |
| 14413 | "typedef long long __m128i __attribute__((__vector_size__(16)));\n" |
| 14414 | "\n" |
| 14415 | "/* Type defines. */\n" |
| 14416 | "typedef double __v2df __attribute__ ((__vector_size__ (16)));\n" |
| 14417 | "typedef long long __v2di __attribute__ ((__vector_size__ (16)));\n" |
| 14418 | "typedef short __v8hi __attribute__((__vector_size__(16)));\n" |
| 14419 | "typedef char __v16qi __attribute__((__vector_size__(16)));\n" |
| 14420 | "\n" |
| 14421 | "/* Unsigned types */\n" |
| 14422 | "typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));\n" |
| 14423 | "typedef unsigned short __v8hu __attribute__((__vector_size__(16)));\n" |
| 14424 | "typedef unsigned char __v16qu __attribute__((__vector_size__(16)));\n" |
| 14425 | "\n" |
| 14426 | "/* We need an explicitly signed variant for char. Note that this shouldn't\n" |
| 14427 | " * appear in the interface though. */\n" |
| 14428 | "typedef signed char __v16qs __attribute__((__vector_size__(16)));\n" |
| 14429 | "\n" |
| 14430 | "/* Define the default attributes for the functions in this file. */\n" |
| 14431 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\"), __min_vector_width__(128)))\n" |
| 14432 | "#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse2\"), __min_vector_width__(64)))\n" |
| 14433 | "\n" |
| 14434 | "/// Adds lower double-precision values in both operands and returns the\n" |
| 14435 | "/// sum in the lower 64 bits of the result. The upper 64 bits of the result\n" |
| 14436 | "/// are copied from the upper double-precision value of the first operand.\n" |
| 14437 | "///\n" |
| 14438 | "/// \\headerfile <x86intrin.h>\n" |
| 14439 | "///\n" |
| 14440 | "/// This intrinsic corresponds to the <c> VADDSD / ADDSD </c> instruction.\n" |
| 14441 | "///\n" |
| 14442 | "/// \\param __a\n" |
| 14443 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14444 | "/// \\param __b\n" |
| 14445 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14446 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
| 14447 | "/// sum of the lower 64 bits of both operands. The upper 64 bits are copied\n" |
| 14448 | "/// from the upper 64 bits of the first source operand.\n" |
| 14449 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14450 | "_mm_add_sd(__m128d __a, __m128d __b)\n" |
| 14451 | "{\n" |
| 14452 | " __a[0] += __b[0];\n" |
| 14453 | " return __a;\n" |
| 14454 | "}\n" |
| 14455 | "\n" |
| 14456 | "/// Adds two 128-bit vectors of [2 x double].\n" |
| 14457 | "///\n" |
| 14458 | "/// \\headerfile <x86intrin.h>\n" |
| 14459 | "///\n" |
| 14460 | "/// This intrinsic corresponds to the <c> VADDPD / ADDPD </c> instruction.\n" |
| 14461 | "///\n" |
| 14462 | "/// \\param __a\n" |
| 14463 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14464 | "/// \\param __b\n" |
| 14465 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14466 | "/// \\returns A 128-bit vector of [2 x double] containing the sums of both\n" |
| 14467 | "/// operands.\n" |
| 14468 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14469 | "_mm_add_pd(__m128d __a, __m128d __b)\n" |
| 14470 | "{\n" |
| 14471 | " return (__m128d)((__v2df)__a + (__v2df)__b);\n" |
| 14472 | "}\n" |
| 14473 | "\n" |
| 14474 | "/// Subtracts the lower double-precision value of the second operand\n" |
| 14475 | "/// from the lower double-precision value of the first operand and returns\n" |
| 14476 | "/// the difference in the lower 64 bits of the result. The upper 64 bits of\n" |
| 14477 | "/// the result are copied from the upper double-precision value of the first\n" |
| 14478 | "/// operand.\n" |
| 14479 | "///\n" |
| 14480 | "/// \\headerfile <x86intrin.h>\n" |
| 14481 | "///\n" |
| 14482 | "/// This intrinsic corresponds to the <c> VSUBSD / SUBSD </c> instruction.\n" |
| 14483 | "///\n" |
| 14484 | "/// \\param __a\n" |
| 14485 | "/// A 128-bit vector of [2 x double] containing the minuend.\n" |
| 14486 | "/// \\param __b\n" |
| 14487 | "/// A 128-bit vector of [2 x double] containing the subtrahend.\n" |
| 14488 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
| 14489 | "/// difference of the lower 64 bits of both operands. The upper 64 bits are\n" |
| 14490 | "/// copied from the upper 64 bits of the first source operand.\n" |
| 14491 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14492 | "_mm_sub_sd(__m128d __a, __m128d __b)\n" |
| 14493 | "{\n" |
| 14494 | " __a[0] -= __b[0];\n" |
| 14495 | " return __a;\n" |
| 14496 | "}\n" |
| 14497 | "\n" |
| 14498 | "/// Subtracts two 128-bit vectors of [2 x double].\n" |
| 14499 | "///\n" |
| 14500 | "/// \\headerfile <x86intrin.h>\n" |
| 14501 | "///\n" |
| 14502 | "/// This intrinsic corresponds to the <c> VSUBPD / SUBPD </c> instruction.\n" |
| 14503 | "///\n" |
| 14504 | "/// \\param __a\n" |
| 14505 | "/// A 128-bit vector of [2 x double] containing the minuend.\n" |
| 14506 | "/// \\param __b\n" |
| 14507 | "/// A 128-bit vector of [2 x double] containing the subtrahend.\n" |
| 14508 | "/// \\returns A 128-bit vector of [2 x double] containing the differences between\n" |
| 14509 | "/// both operands.\n" |
| 14510 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14511 | "_mm_sub_pd(__m128d __a, __m128d __b)\n" |
| 14512 | "{\n" |
| 14513 | " return (__m128d)((__v2df)__a - (__v2df)__b);\n" |
| 14514 | "}\n" |
| 14515 | "\n" |
| 14516 | "/// Multiplies lower double-precision values in both operands and returns\n" |
| 14517 | "/// the product in the lower 64 bits of the result. The upper 64 bits of the\n" |
| 14518 | "/// result are copied from the upper double-precision value of the first\n" |
| 14519 | "/// operand.\n" |
| 14520 | "///\n" |
| 14521 | "/// \\headerfile <x86intrin.h>\n" |
| 14522 | "///\n" |
| 14523 | "/// This intrinsic corresponds to the <c> VMULSD / MULSD </c> instruction.\n" |
| 14524 | "///\n" |
| 14525 | "/// \\param __a\n" |
| 14526 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14527 | "/// \\param __b\n" |
| 14528 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14529 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
| 14530 | "/// product of the lower 64 bits of both operands. The upper 64 bits are\n" |
| 14531 | "/// copied from the upper 64 bits of the first source operand.\n" |
| 14532 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14533 | "_mm_mul_sd(__m128d __a, __m128d __b)\n" |
| 14534 | "{\n" |
| 14535 | " __a[0] *= __b[0];\n" |
| 14536 | " return __a;\n" |
| 14537 | "}\n" |
| 14538 | "\n" |
| 14539 | "/// Multiplies two 128-bit vectors of [2 x double].\n" |
| 14540 | "///\n" |
| 14541 | "/// \\headerfile <x86intrin.h>\n" |
| 14542 | "///\n" |
| 14543 | "/// This intrinsic corresponds to the <c> VMULPD / MULPD </c> instruction.\n" |
| 14544 | "///\n" |
| 14545 | "/// \\param __a\n" |
| 14546 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
| 14547 | "/// \\param __b\n" |
| 14548 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
| 14549 | "/// \\returns A 128-bit vector of [2 x double] containing the products of both\n" |
| 14550 | "/// operands.\n" |
| 14551 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14552 | "_mm_mul_pd(__m128d __a, __m128d __b)\n" |
| 14553 | "{\n" |
| 14554 | " return (__m128d)((__v2df)__a * (__v2df)__b);\n" |
| 14555 | "}\n" |
| 14556 | "\n" |
| 14557 | "/// Divides the lower double-precision value of the first operand by the\n" |
| 14558 | "/// lower double-precision value of the second operand and returns the\n" |
| 14559 | "/// quotient in the lower 64 bits of the result. The upper 64 bits of the\n" |
| 14560 | "/// result are copied from the upper double-precision value of the first\n" |
| 14561 | "/// operand.\n" |
| 14562 | "///\n" |
| 14563 | "/// \\headerfile <x86intrin.h>\n" |
| 14564 | "///\n" |
| 14565 | "/// This intrinsic corresponds to the <c> VDIVSD / DIVSD </c> instruction.\n" |
| 14566 | "///\n" |
| 14567 | "/// \\param __a\n" |
| 14568 | "/// A 128-bit vector of [2 x double] containing the dividend.\n" |
| 14569 | "/// \\param __b\n" |
| 14570 | "/// A 128-bit vector of [2 x double] containing divisor.\n" |
| 14571 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
| 14572 | "/// quotient of the lower 64 bits of both operands. The upper 64 bits are\n" |
| 14573 | "/// copied from the upper 64 bits of the first source operand.\n" |
| 14574 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14575 | "_mm_div_sd(__m128d __a, __m128d __b)\n" |
| 14576 | "{\n" |
| 14577 | " __a[0] /= __b[0];\n" |
| 14578 | " return __a;\n" |
| 14579 | "}\n" |
| 14580 | "\n" |
| 14581 | "/// Performs an element-by-element division of two 128-bit vectors of\n" |
| 14582 | "/// [2 x double].\n" |
| 14583 | "///\n" |
| 14584 | "/// \\headerfile <x86intrin.h>\n" |
| 14585 | "///\n" |
| 14586 | "/// This intrinsic corresponds to the <c> VDIVPD / DIVPD </c> instruction.\n" |
| 14587 | "///\n" |
| 14588 | "/// \\param __a\n" |
| 14589 | "/// A 128-bit vector of [2 x double] containing the dividend.\n" |
| 14590 | "/// \\param __b\n" |
| 14591 | "/// A 128-bit vector of [2 x double] containing the divisor.\n" |
| 14592 | "/// \\returns A 128-bit vector of [2 x double] containing the quotients of both\n" |
| 14593 | "/// operands.\n" |
| 14594 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14595 | "_mm_div_pd(__m128d __a, __m128d __b)\n" |
| 14596 | "{\n" |
| 14597 | " return (__m128d)((__v2df)__a / (__v2df)__b);\n" |
| 14598 | "}\n" |
| 14599 | "\n" |
| 14600 | "/// Calculates the square root of the lower double-precision value of\n" |
| 14601 | "/// the second operand and returns it in the lower 64 bits of the result.\n" |
| 14602 | "/// The upper 64 bits of the result are copied from the upper\n" |
| 14603 | "/// double-precision value of the first operand.\n" |
| 14604 | "///\n" |
| 14605 | "/// \\headerfile <x86intrin.h>\n" |
| 14606 | "///\n" |
| 14607 | "/// This intrinsic corresponds to the <c> VSQRTSD / SQRTSD </c> instruction.\n" |
| 14608 | "///\n" |
| 14609 | "/// \\param __a\n" |
| 14610 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
| 14611 | "/// upper 64 bits of this operand are copied to the upper 64 bits of the\n" |
| 14612 | "/// result.\n" |
| 14613 | "/// \\param __b\n" |
| 14614 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
| 14615 | "/// square root is calculated using the lower 64 bits of this operand.\n" |
| 14616 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
| 14617 | "/// square root of the lower 64 bits of operand \\a __b, and whose upper 64\n" |
| 14618 | "/// bits are copied from the upper 64 bits of operand \\a __a.\n" |
| 14619 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14620 | "_mm_sqrt_sd(__m128d __a, __m128d __b)\n" |
| 14621 | "{\n" |
| 14622 | " __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);\n" |
| 14623 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
| 14624 | "}\n" |
| 14625 | "\n" |
| 14626 | "/// Calculates the square root of the each of two values stored in a\n" |
| 14627 | "/// 128-bit vector of [2 x double].\n" |
| 14628 | "///\n" |
| 14629 | "/// \\headerfile <x86intrin.h>\n" |
| 14630 | "///\n" |
| 14631 | "/// This intrinsic corresponds to the <c> VSQRTPD / SQRTPD </c> instruction.\n" |
| 14632 | "///\n" |
| 14633 | "/// \\param __a\n" |
| 14634 | "/// A 128-bit vector of [2 x double].\n" |
| 14635 | "/// \\returns A 128-bit vector of [2 x double] containing the square roots of the\n" |
| 14636 | "/// values in the operand.\n" |
| 14637 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14638 | "_mm_sqrt_pd(__m128d __a)\n" |
| 14639 | "{\n" |
| 14640 | " return __builtin_ia32_sqrtpd((__v2df)__a);\n" |
| 14641 | "}\n" |
| 14642 | "\n" |
| 14643 | "/// Compares lower 64-bit double-precision values of both operands, and\n" |
| 14644 | "/// returns the lesser of the pair of values in the lower 64-bits of the\n" |
| 14645 | "/// result. The upper 64 bits of the result are copied from the upper\n" |
| 14646 | "/// double-precision value of the first operand.\n" |
| 14647 | "///\n" |
| 14648 | "/// \\headerfile <x86intrin.h>\n" |
| 14649 | "///\n" |
| 14650 | "/// This intrinsic corresponds to the <c> VMINSD / MINSD </c> instruction.\n" |
| 14651 | "///\n" |
| 14652 | "/// \\param __a\n" |
| 14653 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
| 14654 | "/// lower 64 bits of this operand are used in the comparison.\n" |
| 14655 | "/// \\param __b\n" |
| 14656 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
| 14657 | "/// lower 64 bits of this operand are used in the comparison.\n" |
| 14658 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
| 14659 | "/// minimum value between both operands. The upper 64 bits are copied from\n" |
| 14660 | "/// the upper 64 bits of the first source operand.\n" |
| 14661 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14662 | "_mm_min_sd(__m128d __a, __m128d __b)\n" |
| 14663 | "{\n" |
| 14664 | " return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);\n" |
| 14665 | "}\n" |
| 14666 | "\n" |
| 14667 | "/// Performs element-by-element comparison of the two 128-bit vectors of\n" |
| 14668 | "/// [2 x double] and returns the vector containing the lesser of each pair of\n" |
| 14669 | "/// values.\n" |
| 14670 | "///\n" |
| 14671 | "/// \\headerfile <x86intrin.h>\n" |
| 14672 | "///\n" |
| 14673 | "/// This intrinsic corresponds to the <c> VMINPD / MINPD </c> instruction.\n" |
| 14674 | "///\n" |
| 14675 | "/// \\param __a\n" |
| 14676 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
| 14677 | "/// \\param __b\n" |
| 14678 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
| 14679 | "/// \\returns A 128-bit vector of [2 x double] containing the minimum values\n" |
| 14680 | "/// between both operands.\n" |
| 14681 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14682 | "_mm_min_pd(__m128d __a, __m128d __b)\n" |
| 14683 | "{\n" |
| 14684 | " return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);\n" |
| 14685 | "}\n" |
| 14686 | "\n" |
| 14687 | "/// Compares lower 64-bit double-precision values of both operands, and\n" |
| 14688 | "/// returns the greater of the pair of values in the lower 64-bits of the\n" |
| 14689 | "/// result. The upper 64 bits of the result are copied from the upper\n" |
| 14690 | "/// double-precision value of the first operand.\n" |
| 14691 | "///\n" |
| 14692 | "/// \\headerfile <x86intrin.h>\n" |
| 14693 | "///\n" |
| 14694 | "/// This intrinsic corresponds to the <c> VMAXSD / MAXSD </c> instruction.\n" |
| 14695 | "///\n" |
| 14696 | "/// \\param __a\n" |
| 14697 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
| 14698 | "/// lower 64 bits of this operand are used in the comparison.\n" |
| 14699 | "/// \\param __b\n" |
| 14700 | "/// A 128-bit vector of [2 x double] containing one of the operands. The\n" |
| 14701 | "/// lower 64 bits of this operand are used in the comparison.\n" |
| 14702 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
| 14703 | "/// maximum value between both operands. The upper 64 bits are copied from\n" |
| 14704 | "/// the upper 64 bits of the first source operand.\n" |
| 14705 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14706 | "_mm_max_sd(__m128d __a, __m128d __b)\n" |
| 14707 | "{\n" |
| 14708 | " return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);\n" |
| 14709 | "}\n" |
| 14710 | "\n" |
| 14711 | "/// Performs element-by-element comparison of the two 128-bit vectors of\n" |
| 14712 | "/// [2 x double] and returns the vector containing the greater of each pair\n" |
| 14713 | "/// of values.\n" |
| 14714 | "///\n" |
| 14715 | "/// \\headerfile <x86intrin.h>\n" |
| 14716 | "///\n" |
| 14717 | "/// This intrinsic corresponds to the <c> VMAXPD / MAXPD </c> instruction.\n" |
| 14718 | "///\n" |
| 14719 | "/// \\param __a\n" |
| 14720 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
| 14721 | "/// \\param __b\n" |
| 14722 | "/// A 128-bit vector of [2 x double] containing one of the operands.\n" |
| 14723 | "/// \\returns A 128-bit vector of [2 x double] containing the maximum values\n" |
| 14724 | "/// between both operands.\n" |
| 14725 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14726 | "_mm_max_pd(__m128d __a, __m128d __b)\n" |
| 14727 | "{\n" |
| 14728 | " return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);\n" |
| 14729 | "}\n" |
| 14730 | "\n" |
| 14731 | "/// Performs a bitwise AND of two 128-bit vectors of [2 x double].\n" |
| 14732 | "///\n" |
| 14733 | "/// \\headerfile <x86intrin.h>\n" |
| 14734 | "///\n" |
| 14735 | "/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n" |
| 14736 | "///\n" |
| 14737 | "/// \\param __a\n" |
| 14738 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14739 | "/// \\param __b\n" |
| 14740 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14741 | "/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n" |
| 14742 | "/// values between both operands.\n" |
| 14743 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14744 | "_mm_and_pd(__m128d __a, __m128d __b)\n" |
| 14745 | "{\n" |
| 14746 | " return (__m128d)((__v2du)__a & (__v2du)__b);\n" |
| 14747 | "}\n" |
| 14748 | "\n" |
| 14749 | "/// Performs a bitwise AND of two 128-bit vectors of [2 x double], using\n" |
| 14750 | "/// the one's complement of the values contained in the first source operand.\n" |
| 14751 | "///\n" |
| 14752 | "/// \\headerfile <x86intrin.h>\n" |
| 14753 | "///\n" |
| 14754 | "/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n" |
| 14755 | "///\n" |
| 14756 | "/// \\param __a\n" |
| 14757 | "/// A 128-bit vector of [2 x double] containing the left source operand. The\n" |
| 14758 | "/// one's complement of this value is used in the bitwise AND.\n" |
| 14759 | "/// \\param __b\n" |
| 14760 | "/// A 128-bit vector of [2 x double] containing the right source operand.\n" |
| 14761 | "/// \\returns A 128-bit vector of [2 x double] containing the bitwise AND of the\n" |
| 14762 | "/// values in the second operand and the one's complement of the first\n" |
| 14763 | "/// operand.\n" |
| 14764 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14765 | "_mm_andnot_pd(__m128d __a, __m128d __b)\n" |
| 14766 | "{\n" |
| 14767 | " return (__m128d)(~(__v2du)__a & (__v2du)__b);\n" |
| 14768 | "}\n" |
| 14769 | "\n" |
| 14770 | "/// Performs a bitwise OR of two 128-bit vectors of [2 x double].\n" |
| 14771 | "///\n" |
| 14772 | "/// \\headerfile <x86intrin.h>\n" |
| 14773 | "///\n" |
| 14774 | "/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n" |
| 14775 | "///\n" |
| 14776 | "/// \\param __a\n" |
| 14777 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14778 | "/// \\param __b\n" |
| 14779 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14780 | "/// \\returns A 128-bit vector of [2 x double] containing the bitwise OR of the\n" |
| 14781 | "/// values between both operands.\n" |
| 14782 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14783 | "_mm_or_pd(__m128d __a, __m128d __b)\n" |
| 14784 | "{\n" |
| 14785 | " return (__m128d)((__v2du)__a | (__v2du)__b);\n" |
| 14786 | "}\n" |
| 14787 | "\n" |
| 14788 | "/// Performs a bitwise XOR of two 128-bit vectors of [2 x double].\n" |
| 14789 | "///\n" |
| 14790 | "/// \\headerfile <x86intrin.h>\n" |
| 14791 | "///\n" |
| 14792 | "/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n" |
| 14793 | "///\n" |
| 14794 | "/// \\param __a\n" |
| 14795 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14796 | "/// \\param __b\n" |
| 14797 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 14798 | "/// \\returns A 128-bit vector of [2 x double] containing the bitwise XOR of the\n" |
| 14799 | "/// values between both operands.\n" |
| 14800 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14801 | "_mm_xor_pd(__m128d __a, __m128d __b)\n" |
| 14802 | "{\n" |
| 14803 | " return (__m128d)((__v2du)__a ^ (__v2du)__b);\n" |
| 14804 | "}\n" |
| 14805 | "\n" |
| 14806 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14807 | "/// 128-bit vectors of [2 x double] for equality. Each comparison yields 0x0\n" |
| 14808 | "/// for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14809 | "///\n" |
| 14810 | "/// \\headerfile <x86intrin.h>\n" |
| 14811 | "///\n" |
| 14812 | "/// This intrinsic corresponds to the <c> VCMPEQPD / CMPEQPD </c> instruction.\n" |
| 14813 | "///\n" |
| 14814 | "/// \\param __a\n" |
| 14815 | "/// A 128-bit vector of [2 x double].\n" |
| 14816 | "/// \\param __b\n" |
| 14817 | "/// A 128-bit vector of [2 x double].\n" |
| 14818 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14819 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14820 | "_mm_cmpeq_pd(__m128d __a, __m128d __b)\n" |
| 14821 | "{\n" |
| 14822 | " return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);\n" |
| 14823 | "}\n" |
| 14824 | "\n" |
| 14825 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14826 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14827 | "/// operand are less than those in the second operand. Each comparison\n" |
| 14828 | "/// yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14829 | "///\n" |
| 14830 | "/// \\headerfile <x86intrin.h>\n" |
| 14831 | "///\n" |
| 14832 | "/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n" |
| 14833 | "///\n" |
| 14834 | "/// \\param __a\n" |
| 14835 | "/// A 128-bit vector of [2 x double].\n" |
| 14836 | "/// \\param __b\n" |
| 14837 | "/// A 128-bit vector of [2 x double].\n" |
| 14838 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14839 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14840 | "_mm_cmplt_pd(__m128d __a, __m128d __b)\n" |
| 14841 | "{\n" |
| 14842 | " return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);\n" |
| 14843 | "}\n" |
| 14844 | "\n" |
| 14845 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14846 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14847 | "/// operand are less than or equal to those in the second operand.\n" |
| 14848 | "///\n" |
| 14849 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14850 | "///\n" |
| 14851 | "/// \\headerfile <x86intrin.h>\n" |
| 14852 | "///\n" |
| 14853 | "/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n" |
| 14854 | "///\n" |
| 14855 | "/// \\param __a\n" |
| 14856 | "/// A 128-bit vector of [2 x double].\n" |
| 14857 | "/// \\param __b\n" |
| 14858 | "/// A 128-bit vector of [2 x double].\n" |
| 14859 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14860 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14861 | "_mm_cmple_pd(__m128d __a, __m128d __b)\n" |
| 14862 | "{\n" |
| 14863 | " return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);\n" |
| 14864 | "}\n" |
| 14865 | "\n" |
| 14866 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14867 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14868 | "/// operand are greater than those in the second operand.\n" |
| 14869 | "///\n" |
| 14870 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14871 | "///\n" |
| 14872 | "/// \\headerfile <x86intrin.h>\n" |
| 14873 | "///\n" |
| 14874 | "/// This intrinsic corresponds to the <c> VCMPLTPD / CMPLTPD </c> instruction.\n" |
| 14875 | "///\n" |
| 14876 | "/// \\param __a\n" |
| 14877 | "/// A 128-bit vector of [2 x double].\n" |
| 14878 | "/// \\param __b\n" |
| 14879 | "/// A 128-bit vector of [2 x double].\n" |
| 14880 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14881 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14882 | "_mm_cmpgt_pd(__m128d __a, __m128d __b)\n" |
| 14883 | "{\n" |
| 14884 | " return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);\n" |
| 14885 | "}\n" |
| 14886 | "\n" |
| 14887 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14888 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14889 | "/// operand are greater than or equal to those in the second operand.\n" |
| 14890 | "///\n" |
| 14891 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14892 | "///\n" |
| 14893 | "/// \\headerfile <x86intrin.h>\n" |
| 14894 | "///\n" |
| 14895 | "/// This intrinsic corresponds to the <c> VCMPLEPD / CMPLEPD </c> instruction.\n" |
| 14896 | "///\n" |
| 14897 | "/// \\param __a\n" |
| 14898 | "/// A 128-bit vector of [2 x double].\n" |
| 14899 | "/// \\param __b\n" |
| 14900 | "/// A 128-bit vector of [2 x double].\n" |
| 14901 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14902 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14903 | "_mm_cmpge_pd(__m128d __a, __m128d __b)\n" |
| 14904 | "{\n" |
| 14905 | " return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);\n" |
| 14906 | "}\n" |
| 14907 | "\n" |
| 14908 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14909 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14910 | "/// operand are ordered with respect to those in the second operand.\n" |
| 14911 | "///\n" |
| 14912 | "/// A pair of double-precision values are \"ordered\" with respect to each\n" |
| 14913 | "/// other if neither value is a NaN. Each comparison yields 0x0 for false,\n" |
| 14914 | "/// 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14915 | "///\n" |
| 14916 | "/// \\headerfile <x86intrin.h>\n" |
| 14917 | "///\n" |
| 14918 | "/// This intrinsic corresponds to the <c> VCMPORDPD / CMPORDPD </c> instruction.\n" |
| 14919 | "///\n" |
| 14920 | "/// \\param __a\n" |
| 14921 | "/// A 128-bit vector of [2 x double].\n" |
| 14922 | "/// \\param __b\n" |
| 14923 | "/// A 128-bit vector of [2 x double].\n" |
| 14924 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14925 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14926 | "_mm_cmpord_pd(__m128d __a, __m128d __b)\n" |
| 14927 | "{\n" |
| 14928 | " return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);\n" |
| 14929 | "}\n" |
| 14930 | "\n" |
| 14931 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14932 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14933 | "/// operand are unordered with respect to those in the second operand.\n" |
| 14934 | "///\n" |
| 14935 | "/// A pair of double-precision values are \"unordered\" with respect to each\n" |
| 14936 | "/// other if one or both values are NaN. Each comparison yields 0x0 for\n" |
| 14937 | "/// false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14938 | "///\n" |
| 14939 | "/// \\headerfile <x86intrin.h>\n" |
| 14940 | "///\n" |
| 14941 | "/// This intrinsic corresponds to the <c> VCMPUNORDPD / CMPUNORDPD </c>\n" |
| 14942 | "/// instruction.\n" |
| 14943 | "///\n" |
| 14944 | "/// \\param __a\n" |
| 14945 | "/// A 128-bit vector of [2 x double].\n" |
| 14946 | "/// \\param __b\n" |
| 14947 | "/// A 128-bit vector of [2 x double].\n" |
| 14948 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14949 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14950 | "_mm_cmpunord_pd(__m128d __a, __m128d __b)\n" |
| 14951 | "{\n" |
| 14952 | " return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);\n" |
| 14953 | "}\n" |
| 14954 | "\n" |
| 14955 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14956 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14957 | "/// operand are unequal to those in the second operand.\n" |
| 14958 | "///\n" |
| 14959 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14960 | "///\n" |
| 14961 | "/// \\headerfile <x86intrin.h>\n" |
| 14962 | "///\n" |
| 14963 | "/// This intrinsic corresponds to the <c> VCMPNEQPD / CMPNEQPD </c> instruction.\n" |
| 14964 | "///\n" |
| 14965 | "/// \\param __a\n" |
| 14966 | "/// A 128-bit vector of [2 x double].\n" |
| 14967 | "/// \\param __b\n" |
| 14968 | "/// A 128-bit vector of [2 x double].\n" |
| 14969 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14970 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14971 | "_mm_cmpneq_pd(__m128d __a, __m128d __b)\n" |
| 14972 | "{\n" |
| 14973 | " return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);\n" |
| 14974 | "}\n" |
| 14975 | "\n" |
| 14976 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14977 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14978 | "/// operand are not less than those in the second operand.\n" |
| 14979 | "///\n" |
| 14980 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 14981 | "///\n" |
| 14982 | "/// \\headerfile <x86intrin.h>\n" |
| 14983 | "///\n" |
| 14984 | "/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n" |
| 14985 | "///\n" |
| 14986 | "/// \\param __a\n" |
| 14987 | "/// A 128-bit vector of [2 x double].\n" |
| 14988 | "/// \\param __b\n" |
| 14989 | "/// A 128-bit vector of [2 x double].\n" |
| 14990 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 14991 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 14992 | "_mm_cmpnlt_pd(__m128d __a, __m128d __b)\n" |
| 14993 | "{\n" |
| 14994 | " return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);\n" |
| 14995 | "}\n" |
| 14996 | "\n" |
| 14997 | "/// Compares each of the corresponding double-precision values of the\n" |
| 14998 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 14999 | "/// operand are not less than or equal to those in the second operand.\n" |
| 15000 | "///\n" |
| 15001 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15002 | "///\n" |
| 15003 | "/// \\headerfile <x86intrin.h>\n" |
| 15004 | "///\n" |
| 15005 | "/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n" |
| 15006 | "///\n" |
| 15007 | "/// \\param __a\n" |
| 15008 | "/// A 128-bit vector of [2 x double].\n" |
| 15009 | "/// \\param __b\n" |
| 15010 | "/// A 128-bit vector of [2 x double].\n" |
| 15011 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 15012 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15013 | "_mm_cmpnle_pd(__m128d __a, __m128d __b)\n" |
| 15014 | "{\n" |
| 15015 | " return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);\n" |
| 15016 | "}\n" |
| 15017 | "\n" |
| 15018 | "/// Compares each of the corresponding double-precision values of the\n" |
| 15019 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 15020 | "/// operand are not greater than those in the second operand.\n" |
| 15021 | "///\n" |
| 15022 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15023 | "///\n" |
| 15024 | "/// \\headerfile <x86intrin.h>\n" |
| 15025 | "///\n" |
| 15026 | "/// This intrinsic corresponds to the <c> VCMPNLTPD / CMPNLTPD </c> instruction.\n" |
| 15027 | "///\n" |
| 15028 | "/// \\param __a\n" |
| 15029 | "/// A 128-bit vector of [2 x double].\n" |
| 15030 | "/// \\param __b\n" |
| 15031 | "/// A 128-bit vector of [2 x double].\n" |
| 15032 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 15033 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15034 | "_mm_cmpngt_pd(__m128d __a, __m128d __b)\n" |
| 15035 | "{\n" |
| 15036 | " return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);\n" |
| 15037 | "}\n" |
| 15038 | "\n" |
| 15039 | "/// Compares each of the corresponding double-precision values of the\n" |
| 15040 | "/// 128-bit vectors of [2 x double] to determine if the values in the first\n" |
| 15041 | "/// operand are not greater than or equal to those in the second operand.\n" |
| 15042 | "///\n" |
| 15043 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15044 | "///\n" |
| 15045 | "/// \\headerfile <x86intrin.h>\n" |
| 15046 | "///\n" |
| 15047 | "/// This intrinsic corresponds to the <c> VCMPNLEPD / CMPNLEPD </c> instruction.\n" |
| 15048 | "///\n" |
| 15049 | "/// \\param __a\n" |
| 15050 | "/// A 128-bit vector of [2 x double].\n" |
| 15051 | "/// \\param __b\n" |
| 15052 | "/// A 128-bit vector of [2 x double].\n" |
| 15053 | "/// \\returns A 128-bit vector containing the comparison results.\n" |
| 15054 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15055 | "_mm_cmpnge_pd(__m128d __a, __m128d __b)\n" |
| 15056 | "{\n" |
| 15057 | " return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);\n" |
| 15058 | "}\n" |
| 15059 | "\n" |
| 15060 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15061 | "/// the two 128-bit floating-point vectors of [2 x double] for equality.\n" |
| 15062 | "///\n" |
| 15063 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15064 | "///\n" |
| 15065 | "/// \\headerfile <x86intrin.h>\n" |
| 15066 | "///\n" |
| 15067 | "/// This intrinsic corresponds to the <c> VCMPEQSD / CMPEQSD </c> instruction.\n" |
| 15068 | "///\n" |
| 15069 | "/// \\param __a\n" |
| 15070 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15071 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15072 | "/// \\param __b\n" |
| 15073 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15074 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15075 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15076 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15077 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15078 | "_mm_cmpeq_sd(__m128d __a, __m128d __b)\n" |
| 15079 | "{\n" |
| 15080 | " return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);\n" |
| 15081 | "}\n" |
| 15082 | "\n" |
| 15083 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15084 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15085 | "/// the value in the first parameter is less than the corresponding value in\n" |
| 15086 | "/// the second parameter.\n" |
| 15087 | "///\n" |
| 15088 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15089 | "///\n" |
| 15090 | "/// \\headerfile <x86intrin.h>\n" |
| 15091 | "///\n" |
| 15092 | "/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n" |
| 15093 | "///\n" |
| 15094 | "/// \\param __a\n" |
| 15095 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15096 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15097 | "/// \\param __b\n" |
| 15098 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15099 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15100 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15101 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15102 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15103 | "_mm_cmplt_sd(__m128d __a, __m128d __b)\n" |
| 15104 | "{\n" |
| 15105 | " return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);\n" |
| 15106 | "}\n" |
| 15107 | "\n" |
| 15108 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15109 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15110 | "/// the value in the first parameter is less than or equal to the\n" |
| 15111 | "/// corresponding value in the second parameter.\n" |
| 15112 | "///\n" |
| 15113 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15114 | "///\n" |
| 15115 | "/// \\headerfile <x86intrin.h>\n" |
| 15116 | "///\n" |
| 15117 | "/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n" |
| 15118 | "///\n" |
| 15119 | "/// \\param __a\n" |
| 15120 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15121 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15122 | "/// \\param __b\n" |
| 15123 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15124 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15125 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15126 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15127 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15128 | "_mm_cmple_sd(__m128d __a, __m128d __b)\n" |
| 15129 | "{\n" |
| 15130 | " return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);\n" |
| 15131 | "}\n" |
| 15132 | "\n" |
| 15133 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15134 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15135 | "/// the value in the first parameter is greater than the corresponding value\n" |
| 15136 | "/// in the second parameter.\n" |
| 15137 | "///\n" |
| 15138 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15139 | "///\n" |
| 15140 | "/// \\headerfile <x86intrin.h>\n" |
| 15141 | "///\n" |
| 15142 | "/// This intrinsic corresponds to the <c> VCMPLTSD / CMPLTSD </c> instruction.\n" |
| 15143 | "///\n" |
| 15144 | "/// \\param __a\n" |
| 15145 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15146 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15147 | "/// \\param __b\n" |
| 15148 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15149 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15150 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15151 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15152 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15153 | "_mm_cmpgt_sd(__m128d __a, __m128d __b)\n" |
| 15154 | "{\n" |
| 15155 | " __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);\n" |
| 15156 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
| 15157 | "}\n" |
| 15158 | "\n" |
| 15159 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15160 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15161 | "/// the value in the first parameter is greater than or equal to the\n" |
| 15162 | "/// corresponding value in the second parameter.\n" |
| 15163 | "///\n" |
| 15164 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15165 | "///\n" |
| 15166 | "/// \\headerfile <x86intrin.h>\n" |
| 15167 | "///\n" |
| 15168 | "/// This intrinsic corresponds to the <c> VCMPLESD / CMPLESD </c> instruction.\n" |
| 15169 | "///\n" |
| 15170 | "/// \\param __a\n" |
| 15171 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15172 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15173 | "/// \\param __b\n" |
| 15174 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15175 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15176 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15177 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15178 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15179 | "_mm_cmpge_sd(__m128d __a, __m128d __b)\n" |
| 15180 | "{\n" |
| 15181 | " __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);\n" |
| 15182 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
| 15183 | "}\n" |
| 15184 | "\n" |
| 15185 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15186 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15187 | "/// the value in the first parameter is \"ordered\" with respect to the\n" |
| 15188 | "/// corresponding value in the second parameter.\n" |
| 15189 | "///\n" |
| 15190 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n" |
| 15191 | "/// of double-precision values are \"ordered\" with respect to each other if\n" |
| 15192 | "/// neither value is a NaN.\n" |
| 15193 | "///\n" |
| 15194 | "/// \\headerfile <x86intrin.h>\n" |
| 15195 | "///\n" |
| 15196 | "/// This intrinsic corresponds to the <c> VCMPORDSD / CMPORDSD </c> instruction.\n" |
| 15197 | "///\n" |
| 15198 | "/// \\param __a\n" |
| 15199 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15200 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15201 | "/// \\param __b\n" |
| 15202 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15203 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15204 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15205 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15206 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15207 | "_mm_cmpord_sd(__m128d __a, __m128d __b)\n" |
| 15208 | "{\n" |
| 15209 | " return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);\n" |
| 15210 | "}\n" |
| 15211 | "\n" |
| 15212 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15213 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15214 | "/// the value in the first parameter is \"unordered\" with respect to the\n" |
| 15215 | "/// corresponding value in the second parameter.\n" |
| 15216 | "///\n" |
| 15217 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true. A pair\n" |
| 15218 | "/// of double-precision values are \"unordered\" with respect to each other if\n" |
| 15219 | "/// one or both values are NaN.\n" |
| 15220 | "///\n" |
| 15221 | "/// \\headerfile <x86intrin.h>\n" |
| 15222 | "///\n" |
| 15223 | "/// This intrinsic corresponds to the <c> VCMPUNORDSD / CMPUNORDSD </c>\n" |
| 15224 | "/// instruction.\n" |
| 15225 | "///\n" |
| 15226 | "/// \\param __a\n" |
| 15227 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15228 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15229 | "/// \\param __b\n" |
| 15230 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15231 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15232 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15233 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15234 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15235 | "_mm_cmpunord_sd(__m128d __a, __m128d __b)\n" |
| 15236 | "{\n" |
| 15237 | " return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);\n" |
| 15238 | "}\n" |
| 15239 | "\n" |
| 15240 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15241 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15242 | "/// the value in the first parameter is unequal to the corresponding value in\n" |
| 15243 | "/// the second parameter.\n" |
| 15244 | "///\n" |
| 15245 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15246 | "///\n" |
| 15247 | "/// \\headerfile <x86intrin.h>\n" |
| 15248 | "///\n" |
| 15249 | "/// This intrinsic corresponds to the <c> VCMPNEQSD / CMPNEQSD </c> instruction.\n" |
| 15250 | "///\n" |
| 15251 | "/// \\param __a\n" |
| 15252 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15253 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15254 | "/// \\param __b\n" |
| 15255 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15256 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15257 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15258 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15259 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15260 | "_mm_cmpneq_sd(__m128d __a, __m128d __b)\n" |
| 15261 | "{\n" |
| 15262 | " return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);\n" |
| 15263 | "}\n" |
| 15264 | "\n" |
| 15265 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15266 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15267 | "/// the value in the first parameter is not less than the corresponding\n" |
| 15268 | "/// value in the second parameter.\n" |
| 15269 | "///\n" |
| 15270 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15271 | "///\n" |
| 15272 | "/// \\headerfile <x86intrin.h>\n" |
| 15273 | "///\n" |
| 15274 | "/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n" |
| 15275 | "///\n" |
| 15276 | "/// \\param __a\n" |
| 15277 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15278 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15279 | "/// \\param __b\n" |
| 15280 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15281 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15282 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15283 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15284 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15285 | "_mm_cmpnlt_sd(__m128d __a, __m128d __b)\n" |
| 15286 | "{\n" |
| 15287 | " return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);\n" |
| 15288 | "}\n" |
| 15289 | "\n" |
| 15290 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15291 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15292 | "/// the value in the first parameter is not less than or equal to the\n" |
| 15293 | "/// corresponding value in the second parameter.\n" |
| 15294 | "///\n" |
| 15295 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15296 | "///\n" |
| 15297 | "/// \\headerfile <x86intrin.h>\n" |
| 15298 | "///\n" |
| 15299 | "/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n" |
| 15300 | "///\n" |
| 15301 | "/// \\param __a\n" |
| 15302 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15303 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15304 | "/// \\param __b\n" |
| 15305 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15306 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15307 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15308 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15309 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15310 | "_mm_cmpnle_sd(__m128d __a, __m128d __b)\n" |
| 15311 | "{\n" |
| 15312 | " return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);\n" |
| 15313 | "}\n" |
| 15314 | "\n" |
| 15315 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15316 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15317 | "/// the value in the first parameter is not greater than the corresponding\n" |
| 15318 | "/// value in the second parameter.\n" |
| 15319 | "///\n" |
| 15320 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15321 | "///\n" |
| 15322 | "/// \\headerfile <x86intrin.h>\n" |
| 15323 | "///\n" |
| 15324 | "/// This intrinsic corresponds to the <c> VCMPNLTSD / CMPNLTSD </c> instruction.\n" |
| 15325 | "///\n" |
| 15326 | "/// \\param __a\n" |
| 15327 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15328 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15329 | "/// \\param __b\n" |
| 15330 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15331 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15332 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15333 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15334 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15335 | "_mm_cmpngt_sd(__m128d __a, __m128d __b)\n" |
| 15336 | "{\n" |
| 15337 | " __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);\n" |
| 15338 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
| 15339 | "}\n" |
| 15340 | "\n" |
| 15341 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15342 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15343 | "/// the value in the first parameter is not greater than or equal to the\n" |
| 15344 | "/// corresponding value in the second parameter.\n" |
| 15345 | "///\n" |
| 15346 | "/// The comparison yields 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.\n" |
| 15347 | "///\n" |
| 15348 | "/// \\headerfile <x86intrin.h>\n" |
| 15349 | "///\n" |
| 15350 | "/// This intrinsic corresponds to the <c> VCMPNLESD / CMPNLESD </c> instruction.\n" |
| 15351 | "///\n" |
| 15352 | "/// \\param __a\n" |
| 15353 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15354 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15355 | "/// \\param __b\n" |
| 15356 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15357 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15358 | "/// \\returns A 128-bit vector. The lower 64 bits contains the comparison\n" |
| 15359 | "/// results. The upper 64 bits are copied from the upper 64 bits of \\a __a.\n" |
| 15360 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15361 | "_mm_cmpnge_sd(__m128d __a, __m128d __b)\n" |
| 15362 | "{\n" |
| 15363 | " __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);\n" |
| 15364 | " return __extension__ (__m128d) { __c[0], __a[1] };\n" |
| 15365 | "}\n" |
| 15366 | "\n" |
| 15367 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15368 | "/// the two 128-bit floating-point vectors of [2 x double] for equality.\n" |
| 15369 | "///\n" |
| 15370 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
| 15371 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15372 | "///\n" |
| 15373 | "/// \\headerfile <x86intrin.h>\n" |
| 15374 | "///\n" |
| 15375 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
| 15376 | "///\n" |
| 15377 | "/// \\param __a\n" |
| 15378 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15379 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15380 | "/// \\param __b\n" |
| 15381 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15382 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15383 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15384 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15385 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15386 | "_mm_comieq_sd(__m128d __a, __m128d __b)\n" |
| 15387 | "{\n" |
| 15388 | " return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);\n" |
| 15389 | "}\n" |
| 15390 | "\n" |
| 15391 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15392 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15393 | "/// the value in the first parameter is less than the corresponding value in\n" |
| 15394 | "/// the second parameter.\n" |
| 15395 | "///\n" |
| 15396 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
| 15397 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15398 | "///\n" |
| 15399 | "/// \\headerfile <x86intrin.h>\n" |
| 15400 | "///\n" |
| 15401 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
| 15402 | "///\n" |
| 15403 | "/// \\param __a\n" |
| 15404 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15405 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15406 | "/// \\param __b\n" |
| 15407 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15408 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15409 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15410 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15411 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15412 | "_mm_comilt_sd(__m128d __a, __m128d __b)\n" |
| 15413 | "{\n" |
| 15414 | " return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);\n" |
| 15415 | "}\n" |
| 15416 | "\n" |
| 15417 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15418 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15419 | "/// the value in the first parameter is less than or equal to the\n" |
| 15420 | "/// corresponding value in the second parameter.\n" |
| 15421 | "///\n" |
| 15422 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
| 15423 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15424 | "///\n" |
| 15425 | "/// \\headerfile <x86intrin.h>\n" |
| 15426 | "///\n" |
| 15427 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
| 15428 | "///\n" |
| 15429 | "/// \\param __a\n" |
| 15430 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15431 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15432 | "/// \\param __b\n" |
| 15433 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15434 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15435 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15436 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15437 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15438 | "_mm_comile_sd(__m128d __a, __m128d __b)\n" |
| 15439 | "{\n" |
| 15440 | " return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);\n" |
| 15441 | "}\n" |
| 15442 | "\n" |
| 15443 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15444 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15445 | "/// the value in the first parameter is greater than the corresponding value\n" |
| 15446 | "/// in the second parameter.\n" |
| 15447 | "///\n" |
| 15448 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
| 15449 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15450 | "///\n" |
| 15451 | "/// \\headerfile <x86intrin.h>\n" |
| 15452 | "///\n" |
| 15453 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
| 15454 | "///\n" |
| 15455 | "/// \\param __a\n" |
| 15456 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15457 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15458 | "/// \\param __b\n" |
| 15459 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15460 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15461 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15462 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15463 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15464 | "_mm_comigt_sd(__m128d __a, __m128d __b)\n" |
| 15465 | "{\n" |
| 15466 | " return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);\n" |
| 15467 | "}\n" |
| 15468 | "\n" |
| 15469 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15470 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15471 | "/// the value in the first parameter is greater than or equal to the\n" |
| 15472 | "/// corresponding value in the second parameter.\n" |
| 15473 | "///\n" |
| 15474 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
| 15475 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15476 | "///\n" |
| 15477 | "/// \\headerfile <x86intrin.h>\n" |
| 15478 | "///\n" |
| 15479 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
| 15480 | "///\n" |
| 15481 | "/// \\param __a\n" |
| 15482 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15483 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15484 | "/// \\param __b\n" |
| 15485 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15486 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15487 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15488 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15489 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15490 | "_mm_comige_sd(__m128d __a, __m128d __b)\n" |
| 15491 | "{\n" |
| 15492 | " return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);\n" |
| 15493 | "}\n" |
| 15494 | "\n" |
| 15495 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15496 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15497 | "/// the value in the first parameter is unequal to the corresponding value in\n" |
| 15498 | "/// the second parameter.\n" |
| 15499 | "///\n" |
| 15500 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
| 15501 | "/// lower double-precision values is NaN, 1 is returned.\n" |
| 15502 | "///\n" |
| 15503 | "/// \\headerfile <x86intrin.h>\n" |
| 15504 | "///\n" |
| 15505 | "/// This intrinsic corresponds to the <c> VCOMISD / COMISD </c> instruction.\n" |
| 15506 | "///\n" |
| 15507 | "/// \\param __a\n" |
| 15508 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15509 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15510 | "/// \\param __b\n" |
| 15511 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15512 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15513 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15514 | "/// lower double-precision values is NaN, 1 is returned.\n" |
| 15515 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15516 | "_mm_comineq_sd(__m128d __a, __m128d __b)\n" |
| 15517 | "{\n" |
| 15518 | " return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);\n" |
| 15519 | "}\n" |
| 15520 | "\n" |
| 15521 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15522 | "/// the two 128-bit floating-point vectors of [2 x double] for equality. The\n" |
| 15523 | "/// comparison yields 0 for false, 1 for true.\n" |
| 15524 | "///\n" |
| 15525 | "/// If either of the two lower double-precision values is NaN, 0 is returned.\n" |
| 15526 | "///\n" |
| 15527 | "/// \\headerfile <x86intrin.h>\n" |
| 15528 | "///\n" |
| 15529 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
| 15530 | "///\n" |
| 15531 | "/// \\param __a\n" |
| 15532 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15533 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15534 | "/// \\param __b\n" |
| 15535 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15536 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15537 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15538 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15539 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15540 | "_mm_ucomieq_sd(__m128d __a, __m128d __b)\n" |
| 15541 | "{\n" |
| 15542 | " return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);\n" |
| 15543 | "}\n" |
| 15544 | "\n" |
| 15545 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15546 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15547 | "/// the value in the first parameter is less than the corresponding value in\n" |
| 15548 | "/// the second parameter.\n" |
| 15549 | "///\n" |
| 15550 | "/// The comparison yields 0 for false, 1 for true. If either of the two lower\n" |
| 15551 | "/// double-precision values is NaN, 0 is returned.\n" |
| 15552 | "///\n" |
| 15553 | "/// \\headerfile <x86intrin.h>\n" |
| 15554 | "///\n" |
| 15555 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
| 15556 | "///\n" |
| 15557 | "/// \\param __a\n" |
| 15558 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15559 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15560 | "/// \\param __b\n" |
| 15561 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15562 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15563 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15564 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15565 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15566 | "_mm_ucomilt_sd(__m128d __a, __m128d __b)\n" |
| 15567 | "{\n" |
| 15568 | " return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);\n" |
| 15569 | "}\n" |
| 15570 | "\n" |
| 15571 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15572 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15573 | "/// the value in the first parameter is less than or equal to the\n" |
| 15574 | "/// corresponding value in the second parameter.\n" |
| 15575 | "///\n" |
| 15576 | "/// The comparison yields 0 for false, 1 for true. If either of the two lower\n" |
| 15577 | "/// double-precision values is NaN, 0 is returned.\n" |
| 15578 | "///\n" |
| 15579 | "/// \\headerfile <x86intrin.h>\n" |
| 15580 | "///\n" |
| 15581 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
| 15582 | "///\n" |
| 15583 | "/// \\param __a\n" |
| 15584 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15585 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15586 | "/// \\param __b\n" |
| 15587 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15588 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15589 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15590 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15591 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15592 | "_mm_ucomile_sd(__m128d __a, __m128d __b)\n" |
| 15593 | "{\n" |
| 15594 | " return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);\n" |
| 15595 | "}\n" |
| 15596 | "\n" |
| 15597 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15598 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15599 | "/// the value in the first parameter is greater than the corresponding value\n" |
| 15600 | "/// in the second parameter.\n" |
| 15601 | "///\n" |
| 15602 | "/// The comparison yields 0 for false, 1 for true. If either of the two lower\n" |
| 15603 | "/// double-precision values is NaN, 0 is returned.\n" |
| 15604 | "///\n" |
| 15605 | "/// \\headerfile <x86intrin.h>\n" |
| 15606 | "///\n" |
| 15607 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
| 15608 | "///\n" |
| 15609 | "/// \\param __a\n" |
| 15610 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15611 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15612 | "/// \\param __b\n" |
| 15613 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15614 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15615 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15616 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15617 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15618 | "_mm_ucomigt_sd(__m128d __a, __m128d __b)\n" |
| 15619 | "{\n" |
| 15620 | " return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);\n" |
| 15621 | "}\n" |
| 15622 | "\n" |
| 15623 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15624 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15625 | "/// the value in the first parameter is greater than or equal to the\n" |
| 15626 | "/// corresponding value in the second parameter.\n" |
| 15627 | "///\n" |
| 15628 | "/// The comparison yields 0 for false, 1 for true. If either of the two\n" |
| 15629 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15630 | "///\n" |
| 15631 | "/// \\headerfile <x86intrin.h>\n" |
| 15632 | "///\n" |
| 15633 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
| 15634 | "///\n" |
| 15635 | "/// \\param __a\n" |
| 15636 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15637 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15638 | "/// \\param __b\n" |
| 15639 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15640 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15641 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 15642 | "/// lower double-precision values is NaN, 0 is returned.\n" |
| 15643 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15644 | "_mm_ucomige_sd(__m128d __a, __m128d __b)\n" |
| 15645 | "{\n" |
| 15646 | " return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);\n" |
| 15647 | "}\n" |
| 15648 | "\n" |
| 15649 | "/// Compares the lower double-precision floating-point values in each of\n" |
| 15650 | "/// the two 128-bit floating-point vectors of [2 x double] to determine if\n" |
| 15651 | "/// the value in the first parameter is unequal to the corresponding value in\n" |
| 15652 | "/// the second parameter.\n" |
| 15653 | "///\n" |
| 15654 | "/// The comparison yields 0 for false, 1 for true. If either of the two lower\n" |
| 15655 | "/// double-precision values is NaN, 1 is returned.\n" |
| 15656 | "///\n" |
| 15657 | "/// \\headerfile <x86intrin.h>\n" |
| 15658 | "///\n" |
| 15659 | "/// This intrinsic corresponds to the <c> VUCOMISD / UCOMISD </c> instruction.\n" |
| 15660 | "///\n" |
| 15661 | "/// \\param __a\n" |
| 15662 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15663 | "/// compared to the lower double-precision value of \\a __b.\n" |
| 15664 | "/// \\param __b\n" |
| 15665 | "/// A 128-bit vector of [2 x double]. The lower double-precision value is\n" |
| 15666 | "/// compared to the lower double-precision value of \\a __a.\n" |
| 15667 | "/// \\returns An integer containing the comparison result. If either of the two\n" |
| 15668 | "/// lower double-precision values is NaN, 1 is returned.\n" |
| 15669 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15670 | "_mm_ucomineq_sd(__m128d __a, __m128d __b)\n" |
| 15671 | "{\n" |
| 15672 | " return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);\n" |
| 15673 | "}\n" |
| 15674 | "\n" |
| 15675 | "/// Converts the two double-precision floating-point elements of a\n" |
| 15676 | "/// 128-bit vector of [2 x double] into two single-precision floating-point\n" |
| 15677 | "/// values, returned in the lower 64 bits of a 128-bit vector of [4 x float].\n" |
| 15678 | "/// The upper 64 bits of the result vector are set to zero.\n" |
| 15679 | "///\n" |
| 15680 | "/// \\headerfile <x86intrin.h>\n" |
| 15681 | "///\n" |
| 15682 | "/// This intrinsic corresponds to the <c> VCVTPD2PS / CVTPD2PS </c> instruction.\n" |
| 15683 | "///\n" |
| 15684 | "/// \\param __a\n" |
| 15685 | "/// A 128-bit vector of [2 x double].\n" |
| 15686 | "/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n" |
| 15687 | "/// converted values. The upper 64 bits are set to zero.\n" |
| 15688 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 15689 | "_mm_cvtpd_ps(__m128d __a)\n" |
| 15690 | "{\n" |
| 15691 | " return __builtin_ia32_cvtpd2ps((__v2df)__a);\n" |
| 15692 | "}\n" |
| 15693 | "\n" |
| 15694 | "/// Converts the lower two single-precision floating-point elements of a\n" |
| 15695 | "/// 128-bit vector of [4 x float] into two double-precision floating-point\n" |
| 15696 | "/// values, returned in a 128-bit vector of [2 x double]. The upper two\n" |
| 15697 | "/// elements of the input vector are unused.\n" |
| 15698 | "///\n" |
| 15699 | "/// \\headerfile <x86intrin.h>\n" |
| 15700 | "///\n" |
| 15701 | "/// This intrinsic corresponds to the <c> VCVTPS2PD / CVTPS2PD </c> instruction.\n" |
| 15702 | "///\n" |
| 15703 | "/// \\param __a\n" |
| 15704 | "/// A 128-bit vector of [4 x float]. The lower two single-precision\n" |
| 15705 | "/// floating-point elements are converted to double-precision values. The\n" |
| 15706 | "/// upper two elements are unused.\n" |
| 15707 | "/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n" |
| 15708 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15709 | "_mm_cvtps_pd(__m128 __a)\n" |
| 15710 | "{\n" |
| 15711 | " return (__m128d) __builtin_convertvector(\n" |
| 15712 | " __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);\n" |
| 15713 | "}\n" |
| 15714 | "\n" |
| 15715 | "/// Converts the lower two integer elements of a 128-bit vector of\n" |
| 15716 | "/// [4 x i32] into two double-precision floating-point values, returned in a\n" |
| 15717 | "/// 128-bit vector of [2 x double].\n" |
| 15718 | "///\n" |
| 15719 | "/// The upper two elements of the input vector are unused.\n" |
| 15720 | "///\n" |
| 15721 | "/// \\headerfile <x86intrin.h>\n" |
| 15722 | "///\n" |
| 15723 | "/// This intrinsic corresponds to the <c> VCVTDQ2PD / CVTDQ2PD </c> instruction.\n" |
| 15724 | "///\n" |
| 15725 | "/// \\param __a\n" |
| 15726 | "/// A 128-bit integer vector of [4 x i32]. The lower two integer elements are\n" |
| 15727 | "/// converted to double-precision values.\n" |
| 15728 | "///\n" |
| 15729 | "/// The upper two elements are unused.\n" |
| 15730 | "/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n" |
| 15731 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15732 | "_mm_cvtepi32_pd(__m128i __a)\n" |
| 15733 | "{\n" |
| 15734 | " return (__m128d) __builtin_convertvector(\n" |
| 15735 | " __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);\n" |
| 15736 | "}\n" |
| 15737 | "\n" |
| 15738 | "/// Converts the two double-precision floating-point elements of a\n" |
| 15739 | "/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n" |
| 15740 | "/// returned in the lower 64 bits of a 128-bit vector of [4 x i32]. The upper\n" |
| 15741 | "/// 64 bits of the result vector are set to zero.\n" |
| 15742 | "///\n" |
| 15743 | "/// \\headerfile <x86intrin.h>\n" |
| 15744 | "///\n" |
| 15745 | "/// This intrinsic corresponds to the <c> VCVTPD2DQ / CVTPD2DQ </c> instruction.\n" |
| 15746 | "///\n" |
| 15747 | "/// \\param __a\n" |
| 15748 | "/// A 128-bit vector of [2 x double].\n" |
| 15749 | "/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n" |
| 15750 | "/// converted values. The upper 64 bits are set to zero.\n" |
| 15751 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 15752 | "_mm_cvtpd_epi32(__m128d __a)\n" |
| 15753 | "{\n" |
| 15754 | " return __builtin_ia32_cvtpd2dq((__v2df)__a);\n" |
| 15755 | "}\n" |
| 15756 | "\n" |
| 15757 | "/// Converts the low-order element of a 128-bit vector of [2 x double]\n" |
| 15758 | "/// into a 32-bit signed integer value.\n" |
| 15759 | "///\n" |
| 15760 | "/// \\headerfile <x86intrin.h>\n" |
| 15761 | "///\n" |
| 15762 | "/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n" |
| 15763 | "///\n" |
| 15764 | "/// \\param __a\n" |
| 15765 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n" |
| 15766 | "/// conversion.\n" |
| 15767 | "/// \\returns A 32-bit signed integer containing the converted value.\n" |
| 15768 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15769 | "_mm_cvtsd_si32(__m128d __a)\n" |
| 15770 | "{\n" |
| 15771 | " return __builtin_ia32_cvtsd2si((__v2df)__a);\n" |
| 15772 | "}\n" |
| 15773 | "\n" |
| 15774 | "/// Converts the lower double-precision floating-point element of a\n" |
| 15775 | "/// 128-bit vector of [2 x double], in the second parameter, into a\n" |
| 15776 | "/// single-precision floating-point value, returned in the lower 32 bits of a\n" |
| 15777 | "/// 128-bit vector of [4 x float]. The upper 96 bits of the result vector are\n" |
| 15778 | "/// copied from the upper 96 bits of the first parameter.\n" |
| 15779 | "///\n" |
| 15780 | "/// \\headerfile <x86intrin.h>\n" |
| 15781 | "///\n" |
| 15782 | "/// This intrinsic corresponds to the <c> VCVTSD2SS / CVTSD2SS </c> instruction.\n" |
| 15783 | "///\n" |
| 15784 | "/// \\param __a\n" |
| 15785 | "/// A 128-bit vector of [4 x float]. The upper 96 bits of this parameter are\n" |
| 15786 | "/// copied to the upper 96 bits of the result.\n" |
| 15787 | "/// \\param __b\n" |
| 15788 | "/// A 128-bit vector of [2 x double]. The lower double-precision\n" |
| 15789 | "/// floating-point element is used in the conversion.\n" |
| 15790 | "/// \\returns A 128-bit vector of [4 x float]. The lower 32 bits contain the\n" |
| 15791 | "/// converted value from the second parameter. The upper 96 bits are copied\n" |
| 15792 | "/// from the upper 96 bits of the first parameter.\n" |
| 15793 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 15794 | "_mm_cvtsd_ss(__m128 __a, __m128d __b)\n" |
| 15795 | "{\n" |
| 15796 | " return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);\n" |
| 15797 | "}\n" |
| 15798 | "\n" |
| 15799 | "/// Converts a 32-bit signed integer value, in the second parameter, into\n" |
| 15800 | "/// a double-precision floating-point value, returned in the lower 64 bits of\n" |
| 15801 | "/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n" |
| 15802 | "/// are copied from the upper 64 bits of the first parameter.\n" |
| 15803 | "///\n" |
| 15804 | "/// \\headerfile <x86intrin.h>\n" |
| 15805 | "///\n" |
| 15806 | "/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n" |
| 15807 | "///\n" |
| 15808 | "/// \\param __a\n" |
| 15809 | "/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n" |
| 15810 | "/// copied to the upper 64 bits of the result.\n" |
| 15811 | "/// \\param __b\n" |
| 15812 | "/// A 32-bit signed integer containing the value to be converted.\n" |
| 15813 | "/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n" |
| 15814 | "/// converted value from the second parameter. The upper 64 bits are copied\n" |
| 15815 | "/// from the upper 64 bits of the first parameter.\n" |
| 15816 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15817 | "_mm_cvtsi32_sd(__m128d __a, int __b)\n" |
| 15818 | "{\n" |
| 15819 | " __a[0] = __b;\n" |
| 15820 | " return __a;\n" |
| 15821 | "}\n" |
| 15822 | "\n" |
| 15823 | "/// Converts the lower single-precision floating-point element of a\n" |
| 15824 | "/// 128-bit vector of [4 x float], in the second parameter, into a\n" |
| 15825 | "/// double-precision floating-point value, returned in the lower 64 bits of\n" |
| 15826 | "/// a 128-bit vector of [2 x double]. The upper 64 bits of the result vector\n" |
| 15827 | "/// are copied from the upper 64 bits of the first parameter.\n" |
| 15828 | "///\n" |
| 15829 | "/// \\headerfile <x86intrin.h>\n" |
| 15830 | "///\n" |
| 15831 | "/// This intrinsic corresponds to the <c> VCVTSS2SD / CVTSS2SD </c> instruction.\n" |
| 15832 | "///\n" |
| 15833 | "/// \\param __a\n" |
| 15834 | "/// A 128-bit vector of [2 x double]. The upper 64 bits of this parameter are\n" |
| 15835 | "/// copied to the upper 64 bits of the result.\n" |
| 15836 | "/// \\param __b\n" |
| 15837 | "/// A 128-bit vector of [4 x float]. The lower single-precision\n" |
| 15838 | "/// floating-point element is used in the conversion.\n" |
| 15839 | "/// \\returns A 128-bit vector of [2 x double]. The lower 64 bits contain the\n" |
| 15840 | "/// converted value from the second parameter. The upper 64 bits are copied\n" |
| 15841 | "/// from the upper 64 bits of the first parameter.\n" |
| 15842 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15843 | "_mm_cvtss_sd(__m128d __a, __m128 __b)\n" |
| 15844 | "{\n" |
| 15845 | " __a[0] = __b[0];\n" |
| 15846 | " return __a;\n" |
| 15847 | "}\n" |
| 15848 | "\n" |
| 15849 | "/// Converts the two double-precision floating-point elements of a\n" |
| 15850 | "/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n" |
| 15851 | "/// returned in the lower 64 bits of a 128-bit vector of [4 x i32].\n" |
| 15852 | "///\n" |
| 15853 | "/// If the result of either conversion is inexact, the result is truncated\n" |
| 15854 | "/// (rounded towards zero) regardless of the current MXCSR setting. The upper\n" |
| 15855 | "/// 64 bits of the result vector are set to zero.\n" |
| 15856 | "///\n" |
| 15857 | "/// \\headerfile <x86intrin.h>\n" |
| 15858 | "///\n" |
| 15859 | "/// This intrinsic corresponds to the <c> VCVTTPD2DQ / CVTTPD2DQ </c>\n" |
| 15860 | "/// instruction.\n" |
| 15861 | "///\n" |
| 15862 | "/// \\param __a\n" |
| 15863 | "/// A 128-bit vector of [2 x double].\n" |
| 15864 | "/// \\returns A 128-bit vector of [4 x i32] whose lower 64 bits contain the\n" |
| 15865 | "/// converted values. The upper 64 bits are set to zero.\n" |
| 15866 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 15867 | "_mm_cvttpd_epi32(__m128d __a)\n" |
| 15868 | "{\n" |
| 15869 | " return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);\n" |
| 15870 | "}\n" |
| 15871 | "\n" |
| 15872 | "/// Converts the low-order element of a [2 x double] vector into a 32-bit\n" |
| 15873 | "/// signed integer value, truncating the result when it is inexact.\n" |
| 15874 | "///\n" |
| 15875 | "/// \\headerfile <x86intrin.h>\n" |
| 15876 | "///\n" |
| 15877 | "/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n" |
| 15878 | "/// instruction.\n" |
| 15879 | "///\n" |
| 15880 | "/// \\param __a\n" |
| 15881 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n" |
| 15882 | "/// conversion.\n" |
| 15883 | "/// \\returns A 32-bit signed integer containing the converted value.\n" |
| 15884 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 15885 | "_mm_cvttsd_si32(__m128d __a)\n" |
| 15886 | "{\n" |
| 15887 | " return __builtin_ia32_cvttsd2si((__v2df)__a);\n" |
| 15888 | "}\n" |
| 15889 | "\n" |
| 15890 | "/// Converts the two double-precision floating-point elements of a\n" |
| 15891 | "/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n" |
| 15892 | "/// returned in a 64-bit vector of [2 x i32].\n" |
| 15893 | "///\n" |
| 15894 | "/// \\headerfile <x86intrin.h>\n" |
| 15895 | "///\n" |
| 15896 | "/// This intrinsic corresponds to the <c> CVTPD2PI </c> instruction.\n" |
| 15897 | "///\n" |
| 15898 | "/// \\param __a\n" |
| 15899 | "/// A 128-bit vector of [2 x double].\n" |
| 15900 | "/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n" |
| 15901 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 15902 | "_mm_cvtpd_pi32(__m128d __a)\n" |
| 15903 | "{\n" |
| 15904 | " return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);\n" |
| 15905 | "}\n" |
| 15906 | "\n" |
| 15907 | "/// Converts the two double-precision floating-point elements of a\n" |
| 15908 | "/// 128-bit vector of [2 x double] into two signed 32-bit integer values,\n" |
| 15909 | "/// returned in a 64-bit vector of [2 x i32].\n" |
| 15910 | "///\n" |
| 15911 | "/// If the result of either conversion is inexact, the result is truncated\n" |
| 15912 | "/// (rounded towards zero) regardless of the current MXCSR setting.\n" |
| 15913 | "///\n" |
| 15914 | "/// \\headerfile <x86intrin.h>\n" |
| 15915 | "///\n" |
| 15916 | "/// This intrinsic corresponds to the <c> CVTTPD2PI </c> instruction.\n" |
| 15917 | "///\n" |
| 15918 | "/// \\param __a\n" |
| 15919 | "/// A 128-bit vector of [2 x double].\n" |
| 15920 | "/// \\returns A 64-bit vector of [2 x i32] containing the converted values.\n" |
| 15921 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 15922 | "_mm_cvttpd_pi32(__m128d __a)\n" |
| 15923 | "{\n" |
| 15924 | " return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);\n" |
| 15925 | "}\n" |
| 15926 | "\n" |
| 15927 | "/// Converts the two signed 32-bit integer elements of a 64-bit vector of\n" |
| 15928 | "/// [2 x i32] into two double-precision floating-point values, returned in a\n" |
| 15929 | "/// 128-bit vector of [2 x double].\n" |
| 15930 | "///\n" |
| 15931 | "/// \\headerfile <x86intrin.h>\n" |
| 15932 | "///\n" |
| 15933 | "/// This intrinsic corresponds to the <c> CVTPI2PD </c> instruction.\n" |
| 15934 | "///\n" |
| 15935 | "/// \\param __a\n" |
| 15936 | "/// A 64-bit vector of [2 x i32].\n" |
| 15937 | "/// \\returns A 128-bit vector of [2 x double] containing the converted values.\n" |
| 15938 | "static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX\n" |
| 15939 | "_mm_cvtpi32_pd(__m64 __a)\n" |
| 15940 | "{\n" |
| 15941 | " return __builtin_ia32_cvtpi2pd((__v2si)__a);\n" |
| 15942 | "}\n" |
| 15943 | "\n" |
| 15944 | "/// Returns the low-order element of a 128-bit vector of [2 x double] as\n" |
| 15945 | "/// a double-precision floating-point value.\n" |
| 15946 | "///\n" |
| 15947 | "/// \\headerfile <x86intrin.h>\n" |
| 15948 | "///\n" |
| 15949 | "/// This intrinsic has no corresponding instruction.\n" |
| 15950 | "///\n" |
| 15951 | "/// \\param __a\n" |
| 15952 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are returned.\n" |
| 15953 | "/// \\returns A double-precision floating-point value copied from the lower 64\n" |
| 15954 | "/// bits of \\a __a.\n" |
| 15955 | "static __inline__ double __DEFAULT_FN_ATTRS\n" |
| 15956 | "_mm_cvtsd_f64(__m128d __a)\n" |
| 15957 | "{\n" |
| 15958 | " return __a[0];\n" |
| 15959 | "}\n" |
| 15960 | "\n" |
| 15961 | "/// Loads a 128-bit floating-point vector of [2 x double] from an aligned\n" |
| 15962 | "/// memory location.\n" |
| 15963 | "///\n" |
| 15964 | "/// \\headerfile <x86intrin.h>\n" |
| 15965 | "///\n" |
| 15966 | "/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction.\n" |
| 15967 | "///\n" |
| 15968 | "/// \\param __dp\n" |
| 15969 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 15970 | "/// location has to be 16-byte aligned.\n" |
| 15971 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n" |
| 15972 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15973 | "_mm_load_pd(double const *__dp)\n" |
| 15974 | "{\n" |
| 15975 | " return *(__m128d*)__dp;\n" |
| 15976 | "}\n" |
| 15977 | "\n" |
| 15978 | "/// Loads a double-precision floating-point value from a specified memory\n" |
| 15979 | "/// location and duplicates it to both vector elements of a 128-bit vector of\n" |
| 15980 | "/// [2 x double].\n" |
| 15981 | "///\n" |
| 15982 | "/// \\headerfile <x86intrin.h>\n" |
| 15983 | "///\n" |
| 15984 | "/// This intrinsic corresponds to the <c> VMOVDDUP / MOVDDUP </c> instruction.\n" |
| 15985 | "///\n" |
| 15986 | "/// \\param __dp\n" |
| 15987 | "/// A pointer to a memory location containing a double-precision value.\n" |
| 15988 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded and\n" |
| 15989 | "/// duplicated values.\n" |
| 15990 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 15991 | "_mm_load1_pd(double const *__dp)\n" |
| 15992 | "{\n" |
| 15993 | " struct __mm_load1_pd_struct {\n" |
| 15994 | " double __u;\n" |
| 15995 | " } __attribute__((__packed__, __may_alias__));\n" |
| 15996 | " double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;\n" |
| 15997 | " return __extension__ (__m128d){ __u, __u };\n" |
| 15998 | "}\n" |
| 15999 | "\n" |
| 16000 | "#define _mm_load_pd1(dp) _mm_load1_pd(dp)\n" |
| 16001 | "\n" |
| 16002 | "/// Loads two double-precision values, in reverse order, from an aligned\n" |
| 16003 | "/// memory location into a 128-bit vector of [2 x double].\n" |
| 16004 | "///\n" |
| 16005 | "/// \\headerfile <x86intrin.h>\n" |
| 16006 | "///\n" |
| 16007 | "/// This intrinsic corresponds to the <c> VMOVAPD / MOVAPD </c> instruction +\n" |
| 16008 | "/// needed shuffling instructions. In AVX mode, the shuffling may be combined\n" |
| 16009 | "/// with the \\c VMOVAPD, resulting in only a \\c VPERMILPD instruction.\n" |
| 16010 | "///\n" |
| 16011 | "/// \\param __dp\n" |
| 16012 | "/// A 16-byte aligned pointer to an array of double-precision values to be\n" |
| 16013 | "/// loaded in reverse order.\n" |
| 16014 | "/// \\returns A 128-bit vector of [2 x double] containing the reversed loaded\n" |
| 16015 | "/// values.\n" |
| 16016 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16017 | "_mm_loadr_pd(double const *__dp)\n" |
| 16018 | "{\n" |
| 16019 | " __m128d __u = *(__m128d*)__dp;\n" |
| 16020 | " return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);\n" |
| 16021 | "}\n" |
| 16022 | "\n" |
| 16023 | "/// Loads a 128-bit floating-point vector of [2 x double] from an\n" |
| 16024 | "/// unaligned memory location.\n" |
| 16025 | "///\n" |
| 16026 | "/// \\headerfile <x86intrin.h>\n" |
| 16027 | "///\n" |
| 16028 | "/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n" |
| 16029 | "///\n" |
| 16030 | "/// \\param __dp\n" |
| 16031 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 16032 | "/// location does not have to be aligned.\n" |
| 16033 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded values.\n" |
| 16034 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16035 | "_mm_loadu_pd(double const *__dp)\n" |
| 16036 | "{\n" |
| 16037 | " struct __loadu_pd {\n" |
| 16038 | " __m128d __v;\n" |
| 16039 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16040 | " return ((struct __loadu_pd*)__dp)->__v;\n" |
| 16041 | "}\n" |
| 16042 | "\n" |
| 16043 | "/// Loads a 64-bit integer value to the low element of a 128-bit integer\n" |
| 16044 | "/// vector and clears the upper element.\n" |
| 16045 | "///\n" |
| 16046 | "/// \\headerfile <x86intrin.h>\n" |
| 16047 | "///\n" |
| 16048 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
| 16049 | "///\n" |
| 16050 | "/// \\param __a\n" |
| 16051 | "/// A pointer to a 64-bit memory location. The address of the memory\n" |
| 16052 | "/// location does not have to be aligned.\n" |
| 16053 | "/// \\returns A 128-bit vector of [2 x i64] containing the loaded value.\n" |
| 16054 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16055 | "_mm_loadu_si64(void const *__a)\n" |
| 16056 | "{\n" |
| 16057 | " struct __loadu_si64 {\n" |
| 16058 | " long long __v;\n" |
| 16059 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16060 | " long long __u = ((struct __loadu_si64*)__a)->__v;\n" |
| 16061 | " return __extension__ (__m128i)(__v2di){__u, 0L};\n" |
| 16062 | "}\n" |
| 16063 | "\n" |
| 16064 | "/// Loads a 64-bit double-precision value to the low element of a\n" |
| 16065 | "/// 128-bit integer vector and clears the upper element.\n" |
| 16066 | "///\n" |
| 16067 | "/// \\headerfile <x86intrin.h>\n" |
| 16068 | "///\n" |
| 16069 | "/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n" |
| 16070 | "///\n" |
| 16071 | "/// \\param __dp\n" |
| 16072 | "/// A pointer to a memory location containing a double-precision value.\n" |
| 16073 | "/// The address of the memory location does not have to be aligned.\n" |
| 16074 | "/// \\returns A 128-bit vector of [2 x double] containing the loaded value.\n" |
| 16075 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16076 | "_mm_load_sd(double const *__dp)\n" |
| 16077 | "{\n" |
| 16078 | " struct __mm_load_sd_struct {\n" |
| 16079 | " double __u;\n" |
| 16080 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16081 | " double __u = ((struct __mm_load_sd_struct*)__dp)->__u;\n" |
| 16082 | " return __extension__ (__m128d){ __u, 0 };\n" |
| 16083 | "}\n" |
| 16084 | "\n" |
| 16085 | "/// Loads a double-precision value into the high-order bits of a 128-bit\n" |
| 16086 | "/// vector of [2 x double]. The low-order bits are copied from the low-order\n" |
| 16087 | "/// bits of the first operand.\n" |
| 16088 | "///\n" |
| 16089 | "/// \\headerfile <x86intrin.h>\n" |
| 16090 | "///\n" |
| 16091 | "/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n" |
| 16092 | "///\n" |
| 16093 | "/// \\param __a\n" |
| 16094 | "/// A 128-bit vector of [2 x double]. \\n\n" |
| 16095 | "/// Bits [63:0] are written to bits [63:0] of the result.\n" |
| 16096 | "/// \\param __dp\n" |
| 16097 | "/// A pointer to a 64-bit memory location containing a double-precision\n" |
| 16098 | "/// floating-point value that is loaded. The loaded value is written to bits\n" |
| 16099 | "/// [127:64] of the result. The address of the memory location does not have\n" |
| 16100 | "/// to be aligned.\n" |
| 16101 | "/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n" |
| 16102 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16103 | "_mm_loadh_pd(__m128d __a, double const *__dp)\n" |
| 16104 | "{\n" |
| 16105 | " struct __mm_loadh_pd_struct {\n" |
| 16106 | " double __u;\n" |
| 16107 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16108 | " double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;\n" |
| 16109 | " return __extension__ (__m128d){ __a[0], __u };\n" |
| 16110 | "}\n" |
| 16111 | "\n" |
| 16112 | "/// Loads a double-precision value into the low-order bits of a 128-bit\n" |
| 16113 | "/// vector of [2 x double]. The high-order bits are copied from the\n" |
| 16114 | "/// high-order bits of the first operand.\n" |
| 16115 | "///\n" |
| 16116 | "/// \\headerfile <x86intrin.h>\n" |
| 16117 | "///\n" |
| 16118 | "/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n" |
| 16119 | "///\n" |
| 16120 | "/// \\param __a\n" |
| 16121 | "/// A 128-bit vector of [2 x double]. \\n\n" |
| 16122 | "/// Bits [127:64] are written to bits [127:64] of the result.\n" |
| 16123 | "/// \\param __dp\n" |
| 16124 | "/// A pointer to a 64-bit memory location containing a double-precision\n" |
| 16125 | "/// floating-point value that is loaded. The loaded value is written to bits\n" |
| 16126 | "/// [63:0] of the result. The address of the memory location does not have to\n" |
| 16127 | "/// be aligned.\n" |
| 16128 | "/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n" |
| 16129 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16130 | "_mm_loadl_pd(__m128d __a, double const *__dp)\n" |
| 16131 | "{\n" |
| 16132 | " struct __mm_loadl_pd_struct {\n" |
| 16133 | " double __u;\n" |
| 16134 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16135 | " double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;\n" |
| 16136 | " return __extension__ (__m128d){ __u, __a[1] };\n" |
| 16137 | "}\n" |
| 16138 | "\n" |
| 16139 | "/// Constructs a 128-bit floating-point vector of [2 x double] with\n" |
| 16140 | "/// unspecified content. This could be used as an argument to another\n" |
| 16141 | "/// intrinsic function where the argument is required but the value is not\n" |
| 16142 | "/// actually used.\n" |
| 16143 | "///\n" |
| 16144 | "/// \\headerfile <x86intrin.h>\n" |
| 16145 | "///\n" |
| 16146 | "/// This intrinsic has no corresponding instruction.\n" |
| 16147 | "///\n" |
| 16148 | "/// \\returns A 128-bit floating-point vector of [2 x double] with unspecified\n" |
| 16149 | "/// content.\n" |
| 16150 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16151 | "_mm_undefined_pd(void)\n" |
| 16152 | "{\n" |
| 16153 | " return (__m128d)__builtin_ia32_undef128();\n" |
| 16154 | "}\n" |
| 16155 | "\n" |
| 16156 | "/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n" |
| 16157 | "/// 64 bits of the vector are initialized with the specified double-precision\n" |
| 16158 | "/// floating-point value. The upper 64 bits are set to zero.\n" |
| 16159 | "///\n" |
| 16160 | "/// \\headerfile <x86intrin.h>\n" |
| 16161 | "///\n" |
| 16162 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
| 16163 | "///\n" |
| 16164 | "/// \\param __w\n" |
| 16165 | "/// A double-precision floating-point value used to initialize the lower 64\n" |
| 16166 | "/// bits of the result.\n" |
| 16167 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double]. The\n" |
| 16168 | "/// lower 64 bits contain the value of the parameter. The upper 64 bits are\n" |
| 16169 | "/// set to zero.\n" |
| 16170 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16171 | "_mm_set_sd(double __w)\n" |
| 16172 | "{\n" |
| 16173 | " return __extension__ (__m128d){ __w, 0 };\n" |
| 16174 | "}\n" |
| 16175 | "\n" |
| 16176 | "/// Constructs a 128-bit floating-point vector of [2 x double], with each\n" |
| 16177 | "/// of the two double-precision floating-point vector elements set to the\n" |
| 16178 | "/// specified double-precision floating-point value.\n" |
| 16179 | "///\n" |
| 16180 | "/// \\headerfile <x86intrin.h>\n" |
| 16181 | "///\n" |
| 16182 | "/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n" |
| 16183 | "///\n" |
| 16184 | "/// \\param __w\n" |
| 16185 | "/// A double-precision floating-point value used to initialize each vector\n" |
| 16186 | "/// element of the result.\n" |
| 16187 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n" |
| 16188 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16189 | "_mm_set1_pd(double __w)\n" |
| 16190 | "{\n" |
| 16191 | " return __extension__ (__m128d){ __w, __w };\n" |
| 16192 | "}\n" |
| 16193 | "\n" |
| 16194 | "/// Constructs a 128-bit floating-point vector of [2 x double], with each\n" |
| 16195 | "/// of the two double-precision floating-point vector elements set to the\n" |
| 16196 | "/// specified double-precision floating-point value.\n" |
| 16197 | "///\n" |
| 16198 | "/// \\headerfile <x86intrin.h>\n" |
| 16199 | "///\n" |
| 16200 | "/// This intrinsic corresponds to the <c> VMOVDDUP / MOVLHPS </c> instruction.\n" |
| 16201 | "///\n" |
| 16202 | "/// \\param __w\n" |
| 16203 | "/// A double-precision floating-point value used to initialize each vector\n" |
| 16204 | "/// element of the result.\n" |
| 16205 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n" |
| 16206 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16207 | "_mm_set_pd1(double __w)\n" |
| 16208 | "{\n" |
| 16209 | " return _mm_set1_pd(__w);\n" |
| 16210 | "}\n" |
| 16211 | "\n" |
| 16212 | "/// Constructs a 128-bit floating-point vector of [2 x double]\n" |
| 16213 | "/// initialized with the specified double-precision floating-point values.\n" |
| 16214 | "///\n" |
| 16215 | "/// \\headerfile <x86intrin.h>\n" |
| 16216 | "///\n" |
| 16217 | "/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n" |
| 16218 | "///\n" |
| 16219 | "/// \\param __w\n" |
| 16220 | "/// A double-precision floating-point value used to initialize the upper 64\n" |
| 16221 | "/// bits of the result.\n" |
| 16222 | "/// \\param __x\n" |
| 16223 | "/// A double-precision floating-point value used to initialize the lower 64\n" |
| 16224 | "/// bits of the result.\n" |
| 16225 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n" |
| 16226 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16227 | "_mm_set_pd(double __w, double __x)\n" |
| 16228 | "{\n" |
| 16229 | " return __extension__ (__m128d){ __x, __w };\n" |
| 16230 | "}\n" |
| 16231 | "\n" |
| 16232 | "/// Constructs a 128-bit floating-point vector of [2 x double],\n" |
| 16233 | "/// initialized in reverse order with the specified double-precision\n" |
| 16234 | "/// floating-point values.\n" |
| 16235 | "///\n" |
| 16236 | "/// \\headerfile <x86intrin.h>\n" |
| 16237 | "///\n" |
| 16238 | "/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n" |
| 16239 | "///\n" |
| 16240 | "/// \\param __w\n" |
| 16241 | "/// A double-precision floating-point value used to initialize the lower 64\n" |
| 16242 | "/// bits of the result.\n" |
| 16243 | "/// \\param __x\n" |
| 16244 | "/// A double-precision floating-point value used to initialize the upper 64\n" |
| 16245 | "/// bits of the result.\n" |
| 16246 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double].\n" |
| 16247 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16248 | "_mm_setr_pd(double __w, double __x)\n" |
| 16249 | "{\n" |
| 16250 | " return __extension__ (__m128d){ __w, __x };\n" |
| 16251 | "}\n" |
| 16252 | "\n" |
| 16253 | "/// Constructs a 128-bit floating-point vector of [2 x double]\n" |
| 16254 | "/// initialized to zero.\n" |
| 16255 | "///\n" |
| 16256 | "/// \\headerfile <x86intrin.h>\n" |
| 16257 | "///\n" |
| 16258 | "/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n" |
| 16259 | "///\n" |
| 16260 | "/// \\returns An initialized 128-bit floating-point vector of [2 x double] with\n" |
| 16261 | "/// all elements set to zero.\n" |
| 16262 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16263 | "_mm_setzero_pd(void)\n" |
| 16264 | "{\n" |
| 16265 | " return __extension__ (__m128d){ 0, 0 };\n" |
| 16266 | "}\n" |
| 16267 | "\n" |
| 16268 | "/// Constructs a 128-bit floating-point vector of [2 x double]. The lower\n" |
| 16269 | "/// 64 bits are set to the lower 64 bits of the second parameter. The upper\n" |
| 16270 | "/// 64 bits are set to the upper 64 bits of the first parameter.\n" |
| 16271 | "///\n" |
| 16272 | "/// \\headerfile <x86intrin.h>\n" |
| 16273 | "///\n" |
| 16274 | "/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n" |
| 16275 | "///\n" |
| 16276 | "/// \\param __a\n" |
| 16277 | "/// A 128-bit vector of [2 x double]. The upper 64 bits are written to the\n" |
| 16278 | "/// upper 64 bits of the result.\n" |
| 16279 | "/// \\param __b\n" |
| 16280 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are written to the\n" |
| 16281 | "/// lower 64 bits of the result.\n" |
| 16282 | "/// \\returns A 128-bit vector of [2 x double] containing the moved values.\n" |
| 16283 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 16284 | "_mm_move_sd(__m128d __a, __m128d __b)\n" |
| 16285 | "{\n" |
| 16286 | " __a[0] = __b[0];\n" |
| 16287 | " return __a;\n" |
| 16288 | "}\n" |
| 16289 | "\n" |
| 16290 | "/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n" |
| 16291 | "/// memory location.\n" |
| 16292 | "///\n" |
| 16293 | "/// \\headerfile <x86intrin.h>\n" |
| 16294 | "///\n" |
| 16295 | "/// This intrinsic corresponds to the <c> VMOVSD / MOVSD </c> instruction.\n" |
| 16296 | "///\n" |
| 16297 | "/// \\param __dp\n" |
| 16298 | "/// A pointer to a 64-bit memory location.\n" |
| 16299 | "/// \\param __a\n" |
| 16300 | "/// A 128-bit vector of [2 x double] containing the value to be stored.\n" |
| 16301 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 16302 | "_mm_store_sd(double *__dp, __m128d __a)\n" |
| 16303 | "{\n" |
| 16304 | " struct __mm_store_sd_struct {\n" |
| 16305 | " double __u;\n" |
| 16306 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16307 | " ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];\n" |
| 16308 | "}\n" |
| 16309 | "\n" |
| 16310 | "/// Moves packed double-precision values from a 128-bit vector of\n" |
| 16311 | "/// [2 x double] to a memory location.\n" |
| 16312 | "///\n" |
| 16313 | "/// \\headerfile <x86intrin.h>\n" |
| 16314 | "///\n" |
| 16315 | "/// This intrinsic corresponds to the <c>VMOVAPD / MOVAPS</c> instruction.\n" |
| 16316 | "///\n" |
| 16317 | "/// \\param __dp\n" |
| 16318 | "/// A pointer to an aligned memory location that can store two\n" |
| 16319 | "/// double-precision values.\n" |
| 16320 | "/// \\param __a\n" |
| 16321 | "/// A packed 128-bit vector of [2 x double] containing the values to be\n" |
| 16322 | "/// moved.\n" |
| 16323 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 16324 | "_mm_store_pd(double *__dp, __m128d __a)\n" |
| 16325 | "{\n" |
| 16326 | " *(__m128d*)__dp = __a;\n" |
| 16327 | "}\n" |
| 16328 | "\n" |
| 16329 | "/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n" |
| 16330 | "/// the upper and lower 64 bits of a memory location.\n" |
| 16331 | "///\n" |
| 16332 | "/// \\headerfile <x86intrin.h>\n" |
| 16333 | "///\n" |
| 16334 | "/// This intrinsic corresponds to the\n" |
| 16335 | "/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n" |
| 16336 | "///\n" |
| 16337 | "/// \\param __dp\n" |
| 16338 | "/// A pointer to a memory location that can store two double-precision\n" |
| 16339 | "/// values.\n" |
| 16340 | "/// \\param __a\n" |
| 16341 | "/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n" |
| 16342 | "/// of the values in \\a __dp.\n" |
| 16343 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 16344 | "_mm_store1_pd(double *__dp, __m128d __a)\n" |
| 16345 | "{\n" |
| 16346 | " __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n" |
| 16347 | " _mm_store_pd(__dp, __a);\n" |
| 16348 | "}\n" |
| 16349 | "\n" |
| 16350 | "/// Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to\n" |
| 16351 | "/// the upper and lower 64 bits of a memory location.\n" |
| 16352 | "///\n" |
| 16353 | "/// \\headerfile <x86intrin.h>\n" |
| 16354 | "///\n" |
| 16355 | "/// This intrinsic corresponds to the\n" |
| 16356 | "/// <c> VMOVDDUP + VMOVAPD / MOVLHPS + MOVAPS </c> instruction.\n" |
| 16357 | "///\n" |
| 16358 | "/// \\param __dp\n" |
| 16359 | "/// A pointer to a memory location that can store two double-precision\n" |
| 16360 | "/// values.\n" |
| 16361 | "/// \\param __a\n" |
| 16362 | "/// A 128-bit vector of [2 x double] whose lower 64 bits are copied to each\n" |
| 16363 | "/// of the values in \\a __dp.\n" |
| 16364 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 16365 | "_mm_store_pd1(double *__dp, __m128d __a)\n" |
| 16366 | "{\n" |
| 16367 | " _mm_store1_pd(__dp, __a);\n" |
| 16368 | "}\n" |
| 16369 | "\n" |
| 16370 | "/// Stores a 128-bit vector of [2 x double] into an unaligned memory\n" |
| 16371 | "/// location.\n" |
| 16372 | "///\n" |
| 16373 | "/// \\headerfile <x86intrin.h>\n" |
| 16374 | "///\n" |
| 16375 | "/// This intrinsic corresponds to the <c> VMOVUPD / MOVUPD </c> instruction.\n" |
| 16376 | "///\n" |
| 16377 | "/// \\param __dp\n" |
| 16378 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 16379 | "/// location does not have to be aligned.\n" |
| 16380 | "/// \\param __a\n" |
| 16381 | "/// A 128-bit vector of [2 x double] containing the values to be stored.\n" |
| 16382 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 16383 | "_mm_storeu_pd(double *__dp, __m128d __a)\n" |
| 16384 | "{\n" |
| 16385 | " struct __storeu_pd {\n" |
| 16386 | " __m128d __v;\n" |
| 16387 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16388 | " ((struct __storeu_pd*)__dp)->__v = __a;\n" |
| 16389 | "}\n" |
| 16390 | "\n" |
| 16391 | "/// Stores two double-precision values, in reverse order, from a 128-bit\n" |
| 16392 | "/// vector of [2 x double] to a 16-byte aligned memory location.\n" |
| 16393 | "///\n" |
| 16394 | "/// \\headerfile <x86intrin.h>\n" |
| 16395 | "///\n" |
| 16396 | "/// This intrinsic corresponds to a shuffling instruction followed by a\n" |
| 16397 | "/// <c> VMOVAPD / MOVAPD </c> instruction.\n" |
| 16398 | "///\n" |
| 16399 | "/// \\param __dp\n" |
| 16400 | "/// A pointer to a 16-byte aligned memory location that can store two\n" |
| 16401 | "/// double-precision values.\n" |
| 16402 | "/// \\param __a\n" |
| 16403 | "/// A 128-bit vector of [2 x double] containing the values to be reversed and\n" |
| 16404 | "/// stored.\n" |
| 16405 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 16406 | "_mm_storer_pd(double *__dp, __m128d __a)\n" |
| 16407 | "{\n" |
| 16408 | " __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);\n" |
| 16409 | " *(__m128d *)__dp = __a;\n" |
| 16410 | "}\n" |
| 16411 | "\n" |
| 16412 | "/// Stores the upper 64 bits of a 128-bit vector of [2 x double] to a\n" |
| 16413 | "/// memory location.\n" |
| 16414 | "///\n" |
| 16415 | "/// \\headerfile <x86intrin.h>\n" |
| 16416 | "///\n" |
| 16417 | "/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n" |
| 16418 | "///\n" |
| 16419 | "/// \\param __dp\n" |
| 16420 | "/// A pointer to a 64-bit memory location.\n" |
| 16421 | "/// \\param __a\n" |
| 16422 | "/// A 128-bit vector of [2 x double] containing the value to be stored.\n" |
| 16423 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 16424 | "_mm_storeh_pd(double *__dp, __m128d __a)\n" |
| 16425 | "{\n" |
| 16426 | " struct __mm_storeh_pd_struct {\n" |
| 16427 | " double __u;\n" |
| 16428 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16429 | " ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];\n" |
| 16430 | "}\n" |
| 16431 | "\n" |
| 16432 | "/// Stores the lower 64 bits of a 128-bit vector of [2 x double] to a\n" |
| 16433 | "/// memory location.\n" |
| 16434 | "///\n" |
| 16435 | "/// \\headerfile <x86intrin.h>\n" |
| 16436 | "///\n" |
| 16437 | "/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n" |
| 16438 | "///\n" |
| 16439 | "/// \\param __dp\n" |
| 16440 | "/// A pointer to a 64-bit memory location.\n" |
| 16441 | "/// \\param __a\n" |
| 16442 | "/// A 128-bit vector of [2 x double] containing the value to be stored.\n" |
| 16443 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 16444 | "_mm_storel_pd(double *__dp, __m128d __a)\n" |
| 16445 | "{\n" |
| 16446 | " struct __mm_storeh_pd_struct {\n" |
| 16447 | " double __u;\n" |
| 16448 | " } __attribute__((__packed__, __may_alias__));\n" |
| 16449 | " ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];\n" |
| 16450 | "}\n" |
| 16451 | "\n" |
| 16452 | "/// Adds the corresponding elements of two 128-bit vectors of [16 x i8],\n" |
| 16453 | "/// saving the lower 8 bits of each sum in the corresponding element of a\n" |
| 16454 | "/// 128-bit result vector of [16 x i8].\n" |
| 16455 | "///\n" |
| 16456 | "/// The integer elements of both parameters can be either signed or unsigned.\n" |
| 16457 | "///\n" |
| 16458 | "/// \\headerfile <x86intrin.h>\n" |
| 16459 | "///\n" |
| 16460 | "/// This intrinsic corresponds to the <c> VPADDB / PADDB </c> instruction.\n" |
| 16461 | "///\n" |
| 16462 | "/// \\param __a\n" |
| 16463 | "/// A 128-bit vector of [16 x i8].\n" |
| 16464 | "/// \\param __b\n" |
| 16465 | "/// A 128-bit vector of [16 x i8].\n" |
| 16466 | "/// \\returns A 128-bit vector of [16 x i8] containing the sums of both\n" |
| 16467 | "/// parameters.\n" |
| 16468 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16469 | "_mm_add_epi8(__m128i __a, __m128i __b)\n" |
| 16470 | "{\n" |
| 16471 | " return (__m128i)((__v16qu)__a + (__v16qu)__b);\n" |
| 16472 | "}\n" |
| 16473 | "\n" |
| 16474 | "/// Adds the corresponding elements of two 128-bit vectors of [8 x i16],\n" |
| 16475 | "/// saving the lower 16 bits of each sum in the corresponding element of a\n" |
| 16476 | "/// 128-bit result vector of [8 x i16].\n" |
| 16477 | "///\n" |
| 16478 | "/// The integer elements of both parameters can be either signed or unsigned.\n" |
| 16479 | "///\n" |
| 16480 | "/// \\headerfile <x86intrin.h>\n" |
| 16481 | "///\n" |
| 16482 | "/// This intrinsic corresponds to the <c> VPADDW / PADDW </c> instruction.\n" |
| 16483 | "///\n" |
| 16484 | "/// \\param __a\n" |
| 16485 | "/// A 128-bit vector of [8 x i16].\n" |
| 16486 | "/// \\param __b\n" |
| 16487 | "/// A 128-bit vector of [8 x i16].\n" |
| 16488 | "/// \\returns A 128-bit vector of [8 x i16] containing the sums of both\n" |
| 16489 | "/// parameters.\n" |
| 16490 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16491 | "_mm_add_epi16(__m128i __a, __m128i __b)\n" |
| 16492 | "{\n" |
| 16493 | " return (__m128i)((__v8hu)__a + (__v8hu)__b);\n" |
| 16494 | "}\n" |
| 16495 | "\n" |
| 16496 | "/// Adds the corresponding elements of two 128-bit vectors of [4 x i32],\n" |
| 16497 | "/// saving the lower 32 bits of each sum in the corresponding element of a\n" |
| 16498 | "/// 128-bit result vector of [4 x i32].\n" |
| 16499 | "///\n" |
| 16500 | "/// The integer elements of both parameters can be either signed or unsigned.\n" |
| 16501 | "///\n" |
| 16502 | "/// \\headerfile <x86intrin.h>\n" |
| 16503 | "///\n" |
| 16504 | "/// This intrinsic corresponds to the <c> VPADDD / PADDD </c> instruction.\n" |
| 16505 | "///\n" |
| 16506 | "/// \\param __a\n" |
| 16507 | "/// A 128-bit vector of [4 x i32].\n" |
| 16508 | "/// \\param __b\n" |
| 16509 | "/// A 128-bit vector of [4 x i32].\n" |
| 16510 | "/// \\returns A 128-bit vector of [4 x i32] containing the sums of both\n" |
| 16511 | "/// parameters.\n" |
| 16512 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16513 | "_mm_add_epi32(__m128i __a, __m128i __b)\n" |
| 16514 | "{\n" |
| 16515 | " return (__m128i)((__v4su)__a + (__v4su)__b);\n" |
| 16516 | "}\n" |
| 16517 | "\n" |
| 16518 | "/// Adds two signed or unsigned 64-bit integer values, returning the\n" |
| 16519 | "/// lower 64 bits of the sum.\n" |
| 16520 | "///\n" |
| 16521 | "/// \\headerfile <x86intrin.h>\n" |
| 16522 | "///\n" |
| 16523 | "/// This intrinsic corresponds to the <c> PADDQ </c> instruction.\n" |
| 16524 | "///\n" |
| 16525 | "/// \\param __a\n" |
| 16526 | "/// A 64-bit integer.\n" |
| 16527 | "/// \\param __b\n" |
| 16528 | "/// A 64-bit integer.\n" |
| 16529 | "/// \\returns A 64-bit integer containing the sum of both parameters.\n" |
| 16530 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 16531 | "_mm_add_si64(__m64 __a, __m64 __b)\n" |
| 16532 | "{\n" |
| 16533 | " return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);\n" |
| 16534 | "}\n" |
| 16535 | "\n" |
| 16536 | "/// Adds the corresponding elements of two 128-bit vectors of [2 x i64],\n" |
| 16537 | "/// saving the lower 64 bits of each sum in the corresponding element of a\n" |
| 16538 | "/// 128-bit result vector of [2 x i64].\n" |
| 16539 | "///\n" |
| 16540 | "/// The integer elements of both parameters can be either signed or unsigned.\n" |
| 16541 | "///\n" |
| 16542 | "/// \\headerfile <x86intrin.h>\n" |
| 16543 | "///\n" |
| 16544 | "/// This intrinsic corresponds to the <c> VPADDQ / PADDQ </c> instruction.\n" |
| 16545 | "///\n" |
| 16546 | "/// \\param __a\n" |
| 16547 | "/// A 128-bit vector of [2 x i64].\n" |
| 16548 | "/// \\param __b\n" |
| 16549 | "/// A 128-bit vector of [2 x i64].\n" |
| 16550 | "/// \\returns A 128-bit vector of [2 x i64] containing the sums of both\n" |
| 16551 | "/// parameters.\n" |
| 16552 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16553 | "_mm_add_epi64(__m128i __a, __m128i __b)\n" |
| 16554 | "{\n" |
| 16555 | " return (__m128i)((__v2du)__a + (__v2du)__b);\n" |
| 16556 | "}\n" |
| 16557 | "\n" |
| 16558 | "/// Adds, with saturation, the corresponding elements of two 128-bit\n" |
| 16559 | "/// signed [16 x i8] vectors, saving each sum in the corresponding element of\n" |
| 16560 | "/// a 128-bit result vector of [16 x i8]. Positive sums greater than 0x7F are\n" |
| 16561 | "/// saturated to 0x7F. Negative sums less than 0x80 are saturated to 0x80.\n" |
| 16562 | "///\n" |
| 16563 | "/// \\headerfile <x86intrin.h>\n" |
| 16564 | "///\n" |
| 16565 | "/// This intrinsic corresponds to the <c> VPADDSB / PADDSB </c> instruction.\n" |
| 16566 | "///\n" |
| 16567 | "/// \\param __a\n" |
| 16568 | "/// A 128-bit signed [16 x i8] vector.\n" |
| 16569 | "/// \\param __b\n" |
| 16570 | "/// A 128-bit signed [16 x i8] vector.\n" |
| 16571 | "/// \\returns A 128-bit signed [16 x i8] vector containing the saturated sums of\n" |
| 16572 | "/// both parameters.\n" |
| 16573 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16574 | "_mm_adds_epi8(__m128i __a, __m128i __b)\n" |
| 16575 | "{\n" |
| 16576 | " return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);\n" |
| 16577 | "}\n" |
| 16578 | "\n" |
| 16579 | "/// Adds, with saturation, the corresponding elements of two 128-bit\n" |
| 16580 | "/// signed [8 x i16] vectors, saving each sum in the corresponding element of\n" |
| 16581 | "/// a 128-bit result vector of [8 x i16]. Positive sums greater than 0x7FFF\n" |
| 16582 | "/// are saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n" |
| 16583 | "/// 0x8000.\n" |
| 16584 | "///\n" |
| 16585 | "/// \\headerfile <x86intrin.h>\n" |
| 16586 | "///\n" |
| 16587 | "/// This intrinsic corresponds to the <c> VPADDSW / PADDSW </c> instruction.\n" |
| 16588 | "///\n" |
| 16589 | "/// \\param __a\n" |
| 16590 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16591 | "/// \\param __b\n" |
| 16592 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16593 | "/// \\returns A 128-bit signed [8 x i16] vector containing the saturated sums of\n" |
| 16594 | "/// both parameters.\n" |
| 16595 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16596 | "_mm_adds_epi16(__m128i __a, __m128i __b)\n" |
| 16597 | "{\n" |
| 16598 | " return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);\n" |
| 16599 | "}\n" |
| 16600 | "\n" |
| 16601 | "/// Adds, with saturation, the corresponding elements of two 128-bit\n" |
| 16602 | "/// unsigned [16 x i8] vectors, saving each sum in the corresponding element\n" |
| 16603 | "/// of a 128-bit result vector of [16 x i8]. Positive sums greater than 0xFF\n" |
| 16604 | "/// are saturated to 0xFF. Negative sums are saturated to 0x00.\n" |
| 16605 | "///\n" |
| 16606 | "/// \\headerfile <x86intrin.h>\n" |
| 16607 | "///\n" |
| 16608 | "/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n" |
| 16609 | "///\n" |
| 16610 | "/// \\param __a\n" |
| 16611 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
| 16612 | "/// \\param __b\n" |
| 16613 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
| 16614 | "/// \\returns A 128-bit unsigned [16 x i8] vector containing the saturated sums\n" |
| 16615 | "/// of both parameters.\n" |
| 16616 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16617 | "_mm_adds_epu8(__m128i __a, __m128i __b)\n" |
| 16618 | "{\n" |
| 16619 | " return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);\n" |
| 16620 | "}\n" |
| 16621 | "\n" |
| 16622 | "/// Adds, with saturation, the corresponding elements of two 128-bit\n" |
| 16623 | "/// unsigned [8 x i16] vectors, saving each sum in the corresponding element\n" |
| 16624 | "/// of a 128-bit result vector of [8 x i16]. Positive sums greater than\n" |
| 16625 | "/// 0xFFFF are saturated to 0xFFFF. Negative sums are saturated to 0x0000.\n" |
| 16626 | "///\n" |
| 16627 | "/// \\headerfile <x86intrin.h>\n" |
| 16628 | "///\n" |
| 16629 | "/// This intrinsic corresponds to the <c> VPADDUSB / PADDUSB </c> instruction.\n" |
| 16630 | "///\n" |
| 16631 | "/// \\param __a\n" |
| 16632 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
| 16633 | "/// \\param __b\n" |
| 16634 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
| 16635 | "/// \\returns A 128-bit unsigned [8 x i16] vector containing the saturated sums\n" |
| 16636 | "/// of both parameters.\n" |
| 16637 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16638 | "_mm_adds_epu16(__m128i __a, __m128i __b)\n" |
| 16639 | "{\n" |
| 16640 | " return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);\n" |
| 16641 | "}\n" |
| 16642 | "\n" |
| 16643 | "/// Computes the rounded avarages of corresponding elements of two\n" |
| 16644 | "/// 128-bit unsigned [16 x i8] vectors, saving each result in the\n" |
| 16645 | "/// corresponding element of a 128-bit result vector of [16 x i8].\n" |
| 16646 | "///\n" |
| 16647 | "/// \\headerfile <x86intrin.h>\n" |
| 16648 | "///\n" |
| 16649 | "/// This intrinsic corresponds to the <c> VPAVGB / PAVGB </c> instruction.\n" |
| 16650 | "///\n" |
| 16651 | "/// \\param __a\n" |
| 16652 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
| 16653 | "/// \\param __b\n" |
| 16654 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
| 16655 | "/// \\returns A 128-bit unsigned [16 x i8] vector containing the rounded\n" |
| 16656 | "/// averages of both parameters.\n" |
| 16657 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16658 | "_mm_avg_epu8(__m128i __a, __m128i __b)\n" |
| 16659 | "{\n" |
| 16660 | " typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));\n" |
| 16661 | " return (__m128i)__builtin_convertvector(\n" |
| 16662 | " ((__builtin_convertvector((__v16qu)__a, __v16hu) +\n" |
| 16663 | " __builtin_convertvector((__v16qu)__b, __v16hu)) + 1)\n" |
| 16664 | " >> 1, __v16qu);\n" |
| 16665 | "}\n" |
| 16666 | "\n" |
| 16667 | "/// Computes the rounded avarages of corresponding elements of two\n" |
| 16668 | "/// 128-bit unsigned [8 x i16] vectors, saving each result in the\n" |
| 16669 | "/// corresponding element of a 128-bit result vector of [8 x i16].\n" |
| 16670 | "///\n" |
| 16671 | "/// \\headerfile <x86intrin.h>\n" |
| 16672 | "///\n" |
| 16673 | "/// This intrinsic corresponds to the <c> VPAVGW / PAVGW </c> instruction.\n" |
| 16674 | "///\n" |
| 16675 | "/// \\param __a\n" |
| 16676 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
| 16677 | "/// \\param __b\n" |
| 16678 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
| 16679 | "/// \\returns A 128-bit unsigned [8 x i16] vector containing the rounded\n" |
| 16680 | "/// averages of both parameters.\n" |
| 16681 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16682 | "_mm_avg_epu16(__m128i __a, __m128i __b)\n" |
| 16683 | "{\n" |
| 16684 | " typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));\n" |
| 16685 | " return (__m128i)__builtin_convertvector(\n" |
| 16686 | " ((__builtin_convertvector((__v8hu)__a, __v8su) +\n" |
| 16687 | " __builtin_convertvector((__v8hu)__b, __v8su)) + 1)\n" |
| 16688 | " >> 1, __v8hu);\n" |
| 16689 | "}\n" |
| 16690 | "\n" |
| 16691 | "/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]\n" |
| 16692 | "/// vectors, producing eight intermediate 32-bit signed integer products, and\n" |
| 16693 | "/// adds the consecutive pairs of 32-bit products to form a 128-bit signed\n" |
| 16694 | "/// [4 x i32] vector.\n" |
| 16695 | "///\n" |
| 16696 | "/// For example, bits [15:0] of both parameters are multiplied producing a\n" |
| 16697 | "/// 32-bit product, bits [31:16] of both parameters are multiplied producing\n" |
| 16698 | "/// a 32-bit product, and the sum of those two products becomes bits [31:0]\n" |
| 16699 | "/// of the result.\n" |
| 16700 | "///\n" |
| 16701 | "/// \\headerfile <x86intrin.h>\n" |
| 16702 | "///\n" |
| 16703 | "/// This intrinsic corresponds to the <c> VPMADDWD / PMADDWD </c> instruction.\n" |
| 16704 | "///\n" |
| 16705 | "/// \\param __a\n" |
| 16706 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16707 | "/// \\param __b\n" |
| 16708 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16709 | "/// \\returns A 128-bit signed [4 x i32] vector containing the sums of products\n" |
| 16710 | "/// of both parameters.\n" |
| 16711 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16712 | "_mm_madd_epi16(__m128i __a, __m128i __b)\n" |
| 16713 | "{\n" |
| 16714 | " return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);\n" |
| 16715 | "}\n" |
| 16716 | "\n" |
| 16717 | "/// Compares corresponding elements of two 128-bit signed [8 x i16]\n" |
| 16718 | "/// vectors, saving the greater value from each comparison in the\n" |
| 16719 | "/// corresponding element of a 128-bit result vector of [8 x i16].\n" |
| 16720 | "///\n" |
| 16721 | "/// \\headerfile <x86intrin.h>\n" |
| 16722 | "///\n" |
| 16723 | "/// This intrinsic corresponds to the <c> VPMAXSW / PMAXSW </c> instruction.\n" |
| 16724 | "///\n" |
| 16725 | "/// \\param __a\n" |
| 16726 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16727 | "/// \\param __b\n" |
| 16728 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16729 | "/// \\returns A 128-bit signed [8 x i16] vector containing the greater value of\n" |
| 16730 | "/// each comparison.\n" |
| 16731 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16732 | "_mm_max_epi16(__m128i __a, __m128i __b)\n" |
| 16733 | "{\n" |
| 16734 | " return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);\n" |
| 16735 | "}\n" |
| 16736 | "\n" |
| 16737 | "/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n" |
| 16738 | "/// vectors, saving the greater value from each comparison in the\n" |
| 16739 | "/// corresponding element of a 128-bit result vector of [16 x i8].\n" |
| 16740 | "///\n" |
| 16741 | "/// \\headerfile <x86intrin.h>\n" |
| 16742 | "///\n" |
| 16743 | "/// This intrinsic corresponds to the <c> VPMAXUB / PMAXUB </c> instruction.\n" |
| 16744 | "///\n" |
| 16745 | "/// \\param __a\n" |
| 16746 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
| 16747 | "/// \\param __b\n" |
| 16748 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
| 16749 | "/// \\returns A 128-bit unsigned [16 x i8] vector containing the greater value of\n" |
| 16750 | "/// each comparison.\n" |
| 16751 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16752 | "_mm_max_epu8(__m128i __a, __m128i __b)\n" |
| 16753 | "{\n" |
| 16754 | " return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);\n" |
| 16755 | "}\n" |
| 16756 | "\n" |
| 16757 | "/// Compares corresponding elements of two 128-bit signed [8 x i16]\n" |
| 16758 | "/// vectors, saving the smaller value from each comparison in the\n" |
| 16759 | "/// corresponding element of a 128-bit result vector of [8 x i16].\n" |
| 16760 | "///\n" |
| 16761 | "/// \\headerfile <x86intrin.h>\n" |
| 16762 | "///\n" |
| 16763 | "/// This intrinsic corresponds to the <c> VPMINSW / PMINSW </c> instruction.\n" |
| 16764 | "///\n" |
| 16765 | "/// \\param __a\n" |
| 16766 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16767 | "/// \\param __b\n" |
| 16768 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16769 | "/// \\returns A 128-bit signed [8 x i16] vector containing the smaller value of\n" |
| 16770 | "/// each comparison.\n" |
| 16771 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16772 | "_mm_min_epi16(__m128i __a, __m128i __b)\n" |
| 16773 | "{\n" |
| 16774 | " return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);\n" |
| 16775 | "}\n" |
| 16776 | "\n" |
| 16777 | "/// Compares corresponding elements of two 128-bit unsigned [16 x i8]\n" |
| 16778 | "/// vectors, saving the smaller value from each comparison in the\n" |
| 16779 | "/// corresponding element of a 128-bit result vector of [16 x i8].\n" |
| 16780 | "///\n" |
| 16781 | "/// \\headerfile <x86intrin.h>\n" |
| 16782 | "///\n" |
| 16783 | "/// This intrinsic corresponds to the <c> VPMINUB / PMINUB </c> instruction.\n" |
| 16784 | "///\n" |
| 16785 | "/// \\param __a\n" |
| 16786 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
| 16787 | "/// \\param __b\n" |
| 16788 | "/// A 128-bit unsigned [16 x i8] vector.\n" |
| 16789 | "/// \\returns A 128-bit unsigned [16 x i8] vector containing the smaller value of\n" |
| 16790 | "/// each comparison.\n" |
| 16791 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16792 | "_mm_min_epu8(__m128i __a, __m128i __b)\n" |
| 16793 | "{\n" |
| 16794 | " return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);\n" |
| 16795 | "}\n" |
| 16796 | "\n" |
| 16797 | "/// Multiplies the corresponding elements of two signed [8 x i16]\n" |
| 16798 | "/// vectors, saving the upper 16 bits of each 32-bit product in the\n" |
| 16799 | "/// corresponding element of a 128-bit signed [8 x i16] result vector.\n" |
| 16800 | "///\n" |
| 16801 | "/// \\headerfile <x86intrin.h>\n" |
| 16802 | "///\n" |
| 16803 | "/// This intrinsic corresponds to the <c> VPMULHW / PMULHW </c> instruction.\n" |
| 16804 | "///\n" |
| 16805 | "/// \\param __a\n" |
| 16806 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16807 | "/// \\param __b\n" |
| 16808 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16809 | "/// \\returns A 128-bit signed [8 x i16] vector containing the upper 16 bits of\n" |
| 16810 | "/// each of the eight 32-bit products.\n" |
| 16811 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16812 | "_mm_mulhi_epi16(__m128i __a, __m128i __b)\n" |
| 16813 | "{\n" |
| 16814 | " return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);\n" |
| 16815 | "}\n" |
| 16816 | "\n" |
| 16817 | "/// Multiplies the corresponding elements of two unsigned [8 x i16]\n" |
| 16818 | "/// vectors, saving the upper 16 bits of each 32-bit product in the\n" |
| 16819 | "/// corresponding element of a 128-bit unsigned [8 x i16] result vector.\n" |
| 16820 | "///\n" |
| 16821 | "/// \\headerfile <x86intrin.h>\n" |
| 16822 | "///\n" |
| 16823 | "/// This intrinsic corresponds to the <c> VPMULHUW / PMULHUW </c> instruction.\n" |
| 16824 | "///\n" |
| 16825 | "/// \\param __a\n" |
| 16826 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
| 16827 | "/// \\param __b\n" |
| 16828 | "/// A 128-bit unsigned [8 x i16] vector.\n" |
| 16829 | "/// \\returns A 128-bit unsigned [8 x i16] vector containing the upper 16 bits\n" |
| 16830 | "/// of each of the eight 32-bit products.\n" |
| 16831 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16832 | "_mm_mulhi_epu16(__m128i __a, __m128i __b)\n" |
| 16833 | "{\n" |
| 16834 | " return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);\n" |
| 16835 | "}\n" |
| 16836 | "\n" |
| 16837 | "/// Multiplies the corresponding elements of two signed [8 x i16]\n" |
| 16838 | "/// vectors, saving the lower 16 bits of each 32-bit product in the\n" |
| 16839 | "/// corresponding element of a 128-bit signed [8 x i16] result vector.\n" |
| 16840 | "///\n" |
| 16841 | "/// \\headerfile <x86intrin.h>\n" |
| 16842 | "///\n" |
| 16843 | "/// This intrinsic corresponds to the <c> VPMULLW / PMULLW </c> instruction.\n" |
| 16844 | "///\n" |
| 16845 | "/// \\param __a\n" |
| 16846 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16847 | "/// \\param __b\n" |
| 16848 | "/// A 128-bit signed [8 x i16] vector.\n" |
| 16849 | "/// \\returns A 128-bit signed [8 x i16] vector containing the lower 16 bits of\n" |
| 16850 | "/// each of the eight 32-bit products.\n" |
| 16851 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16852 | "_mm_mullo_epi16(__m128i __a, __m128i __b)\n" |
| 16853 | "{\n" |
| 16854 | " return (__m128i)((__v8hu)__a * (__v8hu)__b);\n" |
| 16855 | "}\n" |
| 16856 | "\n" |
| 16857 | "/// Multiplies 32-bit unsigned integer values contained in the lower bits\n" |
| 16858 | "/// of the two 64-bit integer vectors and returns the 64-bit unsigned\n" |
| 16859 | "/// product.\n" |
| 16860 | "///\n" |
| 16861 | "/// \\headerfile <x86intrin.h>\n" |
| 16862 | "///\n" |
| 16863 | "/// This intrinsic corresponds to the <c> PMULUDQ </c> instruction.\n" |
| 16864 | "///\n" |
| 16865 | "/// \\param __a\n" |
| 16866 | "/// A 64-bit integer containing one of the source operands.\n" |
| 16867 | "/// \\param __b\n" |
| 16868 | "/// A 64-bit integer containing one of the source operands.\n" |
| 16869 | "/// \\returns A 64-bit integer vector containing the product of both operands.\n" |
| 16870 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 16871 | "_mm_mul_su32(__m64 __a, __m64 __b)\n" |
| 16872 | "{\n" |
| 16873 | " return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);\n" |
| 16874 | "}\n" |
| 16875 | "\n" |
| 16876 | "/// Multiplies 32-bit unsigned integer values contained in the lower\n" |
| 16877 | "/// bits of the corresponding elements of two [2 x i64] vectors, and returns\n" |
| 16878 | "/// the 64-bit products in the corresponding elements of a [2 x i64] vector.\n" |
| 16879 | "///\n" |
| 16880 | "/// \\headerfile <x86intrin.h>\n" |
| 16881 | "///\n" |
| 16882 | "/// This intrinsic corresponds to the <c> VPMULUDQ / PMULUDQ </c> instruction.\n" |
| 16883 | "///\n" |
| 16884 | "/// \\param __a\n" |
| 16885 | "/// A [2 x i64] vector containing one of the source operands.\n" |
| 16886 | "/// \\param __b\n" |
| 16887 | "/// A [2 x i64] vector containing one of the source operands.\n" |
| 16888 | "/// \\returns A [2 x i64] vector containing the product of both operands.\n" |
| 16889 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16890 | "_mm_mul_epu32(__m128i __a, __m128i __b)\n" |
| 16891 | "{\n" |
| 16892 | " return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);\n" |
| 16893 | "}\n" |
| 16894 | "\n" |
| 16895 | "/// Computes the absolute differences of corresponding 8-bit integer\n" |
| 16896 | "/// values in two 128-bit vectors. Sums the first 8 absolute differences, and\n" |
| 16897 | "/// separately sums the second 8 absolute differences. Packs these two\n" |
| 16898 | "/// unsigned 16-bit integer sums into the upper and lower elements of a\n" |
| 16899 | "/// [2 x i64] vector.\n" |
| 16900 | "///\n" |
| 16901 | "/// \\headerfile <x86intrin.h>\n" |
| 16902 | "///\n" |
| 16903 | "/// This intrinsic corresponds to the <c> VPSADBW / PSADBW </c> instruction.\n" |
| 16904 | "///\n" |
| 16905 | "/// \\param __a\n" |
| 16906 | "/// A 128-bit integer vector containing one of the source operands.\n" |
| 16907 | "/// \\param __b\n" |
| 16908 | "/// A 128-bit integer vector containing one of the source operands.\n" |
| 16909 | "/// \\returns A [2 x i64] vector containing the sums of the sets of absolute\n" |
| 16910 | "/// differences between both operands.\n" |
| 16911 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16912 | "_mm_sad_epu8(__m128i __a, __m128i __b)\n" |
| 16913 | "{\n" |
| 16914 | " return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);\n" |
| 16915 | "}\n" |
| 16916 | "\n" |
| 16917 | "/// Subtracts the corresponding 8-bit integer values in the operands.\n" |
| 16918 | "///\n" |
| 16919 | "/// \\headerfile <x86intrin.h>\n" |
| 16920 | "///\n" |
| 16921 | "/// This intrinsic corresponds to the <c> VPSUBB / PSUBB </c> instruction.\n" |
| 16922 | "///\n" |
| 16923 | "/// \\param __a\n" |
| 16924 | "/// A 128-bit integer vector containing the minuends.\n" |
| 16925 | "/// \\param __b\n" |
| 16926 | "/// A 128-bit integer vector containing the subtrahends.\n" |
| 16927 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
| 16928 | "/// in the operands.\n" |
| 16929 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16930 | "_mm_sub_epi8(__m128i __a, __m128i __b)\n" |
| 16931 | "{\n" |
| 16932 | " return (__m128i)((__v16qu)__a - (__v16qu)__b);\n" |
| 16933 | "}\n" |
| 16934 | "\n" |
| 16935 | "/// Subtracts the corresponding 16-bit integer values in the operands.\n" |
| 16936 | "///\n" |
| 16937 | "/// \\headerfile <x86intrin.h>\n" |
| 16938 | "///\n" |
| 16939 | "/// This intrinsic corresponds to the <c> VPSUBW / PSUBW </c> instruction.\n" |
| 16940 | "///\n" |
| 16941 | "/// \\param __a\n" |
| 16942 | "/// A 128-bit integer vector containing the minuends.\n" |
| 16943 | "/// \\param __b\n" |
| 16944 | "/// A 128-bit integer vector containing the subtrahends.\n" |
| 16945 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
| 16946 | "/// in the operands.\n" |
| 16947 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16948 | "_mm_sub_epi16(__m128i __a, __m128i __b)\n" |
| 16949 | "{\n" |
| 16950 | " return (__m128i)((__v8hu)__a - (__v8hu)__b);\n" |
| 16951 | "}\n" |
| 16952 | "\n" |
| 16953 | "/// Subtracts the corresponding 32-bit integer values in the operands.\n" |
| 16954 | "///\n" |
| 16955 | "/// \\headerfile <x86intrin.h>\n" |
| 16956 | "///\n" |
| 16957 | "/// This intrinsic corresponds to the <c> VPSUBD / PSUBD </c> instruction.\n" |
| 16958 | "///\n" |
| 16959 | "/// \\param __a\n" |
| 16960 | "/// A 128-bit integer vector containing the minuends.\n" |
| 16961 | "/// \\param __b\n" |
| 16962 | "/// A 128-bit integer vector containing the subtrahends.\n" |
| 16963 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
| 16964 | "/// in the operands.\n" |
| 16965 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 16966 | "_mm_sub_epi32(__m128i __a, __m128i __b)\n" |
| 16967 | "{\n" |
| 16968 | " return (__m128i)((__v4su)__a - (__v4su)__b);\n" |
| 16969 | "}\n" |
| 16970 | "\n" |
| 16971 | "/// Subtracts signed or unsigned 64-bit integer values and writes the\n" |
| 16972 | "/// difference to the corresponding bits in the destination.\n" |
| 16973 | "///\n" |
| 16974 | "/// \\headerfile <x86intrin.h>\n" |
| 16975 | "///\n" |
| 16976 | "/// This intrinsic corresponds to the <c> PSUBQ </c> instruction.\n" |
| 16977 | "///\n" |
| 16978 | "/// \\param __a\n" |
| 16979 | "/// A 64-bit integer vector containing the minuend.\n" |
| 16980 | "/// \\param __b\n" |
| 16981 | "/// A 64-bit integer vector containing the subtrahend.\n" |
| 16982 | "/// \\returns A 64-bit integer vector containing the difference of the values in\n" |
| 16983 | "/// the operands.\n" |
| 16984 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 16985 | "_mm_sub_si64(__m64 __a, __m64 __b)\n" |
| 16986 | "{\n" |
| 16987 | " return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);\n" |
| 16988 | "}\n" |
| 16989 | "\n" |
| 16990 | "/// Subtracts the corresponding elements of two [2 x i64] vectors.\n" |
| 16991 | "///\n" |
| 16992 | "/// \\headerfile <x86intrin.h>\n" |
| 16993 | "///\n" |
| 16994 | "/// This intrinsic corresponds to the <c> VPSUBQ / PSUBQ </c> instruction.\n" |
| 16995 | "///\n" |
| 16996 | "/// \\param __a\n" |
| 16997 | "/// A 128-bit integer vector containing the minuends.\n" |
| 16998 | "/// \\param __b\n" |
| 16999 | "/// A 128-bit integer vector containing the subtrahends.\n" |
| 17000 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
| 17001 | "/// in the operands.\n" |
| 17002 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17003 | "_mm_sub_epi64(__m128i __a, __m128i __b)\n" |
| 17004 | "{\n" |
| 17005 | " return (__m128i)((__v2du)__a - (__v2du)__b);\n" |
| 17006 | "}\n" |
| 17007 | "\n" |
| 17008 | "/// Subtracts corresponding 8-bit signed integer values in the input and\n" |
| 17009 | "/// returns the differences in the corresponding bytes in the destination.\n" |
| 17010 | "/// Differences greater than 0x7F are saturated to 0x7F, and differences less\n" |
| 17011 | "/// than 0x80 are saturated to 0x80.\n" |
| 17012 | "///\n" |
| 17013 | "/// \\headerfile <x86intrin.h>\n" |
| 17014 | "///\n" |
| 17015 | "/// This intrinsic corresponds to the <c> VPSUBSB / PSUBSB </c> instruction.\n" |
| 17016 | "///\n" |
| 17017 | "/// \\param __a\n" |
| 17018 | "/// A 128-bit integer vector containing the minuends.\n" |
| 17019 | "/// \\param __b\n" |
| 17020 | "/// A 128-bit integer vector containing the subtrahends.\n" |
| 17021 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
| 17022 | "/// in the operands.\n" |
| 17023 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17024 | "_mm_subs_epi8(__m128i __a, __m128i __b)\n" |
| 17025 | "{\n" |
| 17026 | " return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);\n" |
| 17027 | "}\n" |
| 17028 | "\n" |
| 17029 | "/// Subtracts corresponding 16-bit signed integer values in the input and\n" |
| 17030 | "/// returns the differences in the corresponding bytes in the destination.\n" |
| 17031 | "/// Differences greater than 0x7FFF are saturated to 0x7FFF, and values less\n" |
| 17032 | "/// than 0x8000 are saturated to 0x8000.\n" |
| 17033 | "///\n" |
| 17034 | "/// \\headerfile <x86intrin.h>\n" |
| 17035 | "///\n" |
| 17036 | "/// This intrinsic corresponds to the <c> VPSUBSW / PSUBSW </c> instruction.\n" |
| 17037 | "///\n" |
| 17038 | "/// \\param __a\n" |
| 17039 | "/// A 128-bit integer vector containing the minuends.\n" |
| 17040 | "/// \\param __b\n" |
| 17041 | "/// A 128-bit integer vector containing the subtrahends.\n" |
| 17042 | "/// \\returns A 128-bit integer vector containing the differences of the values\n" |
| 17043 | "/// in the operands.\n" |
| 17044 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17045 | "_mm_subs_epi16(__m128i __a, __m128i __b)\n" |
| 17046 | "{\n" |
| 17047 | " return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);\n" |
| 17048 | "}\n" |
| 17049 | "\n" |
| 17050 | "/// Subtracts corresponding 8-bit unsigned integer values in the input\n" |
| 17051 | "/// and returns the differences in the corresponding bytes in the\n" |
| 17052 | "/// destination. Differences less than 0x00 are saturated to 0x00.\n" |
| 17053 | "///\n" |
| 17054 | "/// \\headerfile <x86intrin.h>\n" |
| 17055 | "///\n" |
| 17056 | "/// This intrinsic corresponds to the <c> VPSUBUSB / PSUBUSB </c> instruction.\n" |
| 17057 | "///\n" |
| 17058 | "/// \\param __a\n" |
| 17059 | "/// A 128-bit integer vector containing the minuends.\n" |
| 17060 | "/// \\param __b\n" |
| 17061 | "/// A 128-bit integer vector containing the subtrahends.\n" |
| 17062 | "/// \\returns A 128-bit integer vector containing the unsigned integer\n" |
| 17063 | "/// differences of the values in the operands.\n" |
| 17064 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17065 | "_mm_subs_epu8(__m128i __a, __m128i __b)\n" |
| 17066 | "{\n" |
| 17067 | " return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);\n" |
| 17068 | "}\n" |
| 17069 | "\n" |
| 17070 | "/// Subtracts corresponding 16-bit unsigned integer values in the input\n" |
| 17071 | "/// and returns the differences in the corresponding bytes in the\n" |
| 17072 | "/// destination. Differences less than 0x0000 are saturated to 0x0000.\n" |
| 17073 | "///\n" |
| 17074 | "/// \\headerfile <x86intrin.h>\n" |
| 17075 | "///\n" |
| 17076 | "/// This intrinsic corresponds to the <c> VPSUBUSW / PSUBUSW </c> instruction.\n" |
| 17077 | "///\n" |
| 17078 | "/// \\param __a\n" |
| 17079 | "/// A 128-bit integer vector containing the minuends.\n" |
| 17080 | "/// \\param __b\n" |
| 17081 | "/// A 128-bit integer vector containing the subtrahends.\n" |
| 17082 | "/// \\returns A 128-bit integer vector containing the unsigned integer\n" |
| 17083 | "/// differences of the values in the operands.\n" |
| 17084 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17085 | "_mm_subs_epu16(__m128i __a, __m128i __b)\n" |
| 17086 | "{\n" |
| 17087 | " return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);\n" |
| 17088 | "}\n" |
| 17089 | "\n" |
| 17090 | "/// Performs a bitwise AND of two 128-bit integer vectors.\n" |
| 17091 | "///\n" |
| 17092 | "/// \\headerfile <x86intrin.h>\n" |
| 17093 | "///\n" |
| 17094 | "/// This intrinsic corresponds to the <c> VPAND / PAND </c> instruction.\n" |
| 17095 | "///\n" |
| 17096 | "/// \\param __a\n" |
| 17097 | "/// A 128-bit integer vector containing one of the source operands.\n" |
| 17098 | "/// \\param __b\n" |
| 17099 | "/// A 128-bit integer vector containing one of the source operands.\n" |
| 17100 | "/// \\returns A 128-bit integer vector containing the bitwise AND of the values\n" |
| 17101 | "/// in both operands.\n" |
| 17102 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17103 | "_mm_and_si128(__m128i __a, __m128i __b)\n" |
| 17104 | "{\n" |
| 17105 | " return (__m128i)((__v2du)__a & (__v2du)__b);\n" |
| 17106 | "}\n" |
| 17107 | "\n" |
| 17108 | "/// Performs a bitwise AND of two 128-bit integer vectors, using the\n" |
| 17109 | "/// one's complement of the values contained in the first source operand.\n" |
| 17110 | "///\n" |
| 17111 | "/// \\headerfile <x86intrin.h>\n" |
| 17112 | "///\n" |
| 17113 | "/// This intrinsic corresponds to the <c> VPANDN / PANDN </c> instruction.\n" |
| 17114 | "///\n" |
| 17115 | "/// \\param __a\n" |
| 17116 | "/// A 128-bit vector containing the left source operand. The one's complement\n" |
| 17117 | "/// of this value is used in the bitwise AND.\n" |
| 17118 | "/// \\param __b\n" |
| 17119 | "/// A 128-bit vector containing the right source operand.\n" |
| 17120 | "/// \\returns A 128-bit integer vector containing the bitwise AND of the one's\n" |
| 17121 | "/// complement of the first operand and the values in the second operand.\n" |
| 17122 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17123 | "_mm_andnot_si128(__m128i __a, __m128i __b)\n" |
| 17124 | "{\n" |
| 17125 | " return (__m128i)(~(__v2du)__a & (__v2du)__b);\n" |
| 17126 | "}\n" |
| 17127 | "/// Performs a bitwise OR of two 128-bit integer vectors.\n" |
| 17128 | "///\n" |
| 17129 | "/// \\headerfile <x86intrin.h>\n" |
| 17130 | "///\n" |
| 17131 | "/// This intrinsic corresponds to the <c> VPOR / POR </c> instruction.\n" |
| 17132 | "///\n" |
| 17133 | "/// \\param __a\n" |
| 17134 | "/// A 128-bit integer vector containing one of the source operands.\n" |
| 17135 | "/// \\param __b\n" |
| 17136 | "/// A 128-bit integer vector containing one of the source operands.\n" |
| 17137 | "/// \\returns A 128-bit integer vector containing the bitwise OR of the values\n" |
| 17138 | "/// in both operands.\n" |
| 17139 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17140 | "_mm_or_si128(__m128i __a, __m128i __b)\n" |
| 17141 | "{\n" |
| 17142 | " return (__m128i)((__v2du)__a | (__v2du)__b);\n" |
| 17143 | "}\n" |
| 17144 | "\n" |
| 17145 | "/// Performs a bitwise exclusive OR of two 128-bit integer vectors.\n" |
| 17146 | "///\n" |
| 17147 | "/// \\headerfile <x86intrin.h>\n" |
| 17148 | "///\n" |
| 17149 | "/// This intrinsic corresponds to the <c> VPXOR / PXOR </c> instruction.\n" |
| 17150 | "///\n" |
| 17151 | "/// \\param __a\n" |
| 17152 | "/// A 128-bit integer vector containing one of the source operands.\n" |
| 17153 | "/// \\param __b\n" |
| 17154 | "/// A 128-bit integer vector containing one of the source operands.\n" |
| 17155 | "/// \\returns A 128-bit integer vector containing the bitwise exclusive OR of the\n" |
| 17156 | "/// values in both operands.\n" |
| 17157 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17158 | "_mm_xor_si128(__m128i __a, __m128i __b)\n" |
| 17159 | "{\n" |
| 17160 | " return (__m128i)((__v2du)__a ^ (__v2du)__b);\n" |
| 17161 | "}\n" |
| 17162 | "\n" |
| 17163 | "/// Left-shifts the 128-bit integer vector operand by the specified\n" |
| 17164 | "/// number of bytes. Low-order bits are cleared.\n" |
| 17165 | "///\n" |
| 17166 | "/// \\headerfile <x86intrin.h>\n" |
| 17167 | "///\n" |
| 17168 | "/// \\code\n" |
| 17169 | "/// __m128i _mm_slli_si128(__m128i a, const int imm);\n" |
| 17170 | "/// \\endcode\n" |
| 17171 | "///\n" |
| 17172 | "/// This intrinsic corresponds to the <c> VPSLLDQ / PSLLDQ </c> instruction.\n" |
| 17173 | "///\n" |
| 17174 | "/// \\param a\n" |
| 17175 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17176 | "/// \\param imm\n" |
| 17177 | "/// An immediate value specifying the number of bytes to left-shift operand\n" |
| 17178 | "/// \\a a.\n" |
| 17179 | "/// \\returns A 128-bit integer vector containing the left-shifted value.\n" |
| 17180 | "#define _mm_slli_si128(a, imm) \\\n" |
| 17181 | " (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n" |
| 17182 | "\n" |
| 17183 | "#define _mm_bslli_si128(a, imm) \\\n" |
| 17184 | " (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n" |
| 17185 | "\n" |
| 17186 | "/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n" |
| 17187 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
| 17188 | "///\n" |
| 17189 | "/// \\headerfile <x86intrin.h>\n" |
| 17190 | "///\n" |
| 17191 | "/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n" |
| 17192 | "///\n" |
| 17193 | "/// \\param __a\n" |
| 17194 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17195 | "/// \\param __count\n" |
| 17196 | "/// An integer value specifying the number of bits to left-shift each value\n" |
| 17197 | "/// in operand \\a __a.\n" |
| 17198 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
| 17199 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17200 | "_mm_slli_epi16(__m128i __a, int __count)\n" |
| 17201 | "{\n" |
| 17202 | " return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);\n" |
| 17203 | "}\n" |
| 17204 | "\n" |
| 17205 | "/// Left-shifts each 16-bit value in the 128-bit integer vector operand\n" |
| 17206 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
| 17207 | "///\n" |
| 17208 | "/// \\headerfile <x86intrin.h>\n" |
| 17209 | "///\n" |
| 17210 | "/// This intrinsic corresponds to the <c> VPSLLW / PSLLW </c> instruction.\n" |
| 17211 | "///\n" |
| 17212 | "/// \\param __a\n" |
| 17213 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17214 | "/// \\param __count\n" |
| 17215 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
| 17216 | "/// to left-shift each value in operand \\a __a.\n" |
| 17217 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
| 17218 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17219 | "_mm_sll_epi16(__m128i __a, __m128i __count)\n" |
| 17220 | "{\n" |
| 17221 | " return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);\n" |
| 17222 | "}\n" |
| 17223 | "\n" |
| 17224 | "/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n" |
| 17225 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
| 17226 | "///\n" |
| 17227 | "/// \\headerfile <x86intrin.h>\n" |
| 17228 | "///\n" |
| 17229 | "/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n" |
| 17230 | "///\n" |
| 17231 | "/// \\param __a\n" |
| 17232 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17233 | "/// \\param __count\n" |
| 17234 | "/// An integer value specifying the number of bits to left-shift each value\n" |
| 17235 | "/// in operand \\a __a.\n" |
| 17236 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
| 17237 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17238 | "_mm_slli_epi32(__m128i __a, int __count)\n" |
| 17239 | "{\n" |
| 17240 | " return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);\n" |
| 17241 | "}\n" |
| 17242 | "\n" |
| 17243 | "/// Left-shifts each 32-bit value in the 128-bit integer vector operand\n" |
| 17244 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
| 17245 | "///\n" |
| 17246 | "/// \\headerfile <x86intrin.h>\n" |
| 17247 | "///\n" |
| 17248 | "/// This intrinsic corresponds to the <c> VPSLLD / PSLLD </c> instruction.\n" |
| 17249 | "///\n" |
| 17250 | "/// \\param __a\n" |
| 17251 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17252 | "/// \\param __count\n" |
| 17253 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
| 17254 | "/// to left-shift each value in operand \\a __a.\n" |
| 17255 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
| 17256 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17257 | "_mm_sll_epi32(__m128i __a, __m128i __count)\n" |
| 17258 | "{\n" |
| 17259 | " return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);\n" |
| 17260 | "}\n" |
| 17261 | "\n" |
| 17262 | "/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n" |
| 17263 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
| 17264 | "///\n" |
| 17265 | "/// \\headerfile <x86intrin.h>\n" |
| 17266 | "///\n" |
| 17267 | "/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n" |
| 17268 | "///\n" |
| 17269 | "/// \\param __a\n" |
| 17270 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17271 | "/// \\param __count\n" |
| 17272 | "/// An integer value specifying the number of bits to left-shift each value\n" |
| 17273 | "/// in operand \\a __a.\n" |
| 17274 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
| 17275 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17276 | "_mm_slli_epi64(__m128i __a, int __count)\n" |
| 17277 | "{\n" |
| 17278 | " return __builtin_ia32_psllqi128((__v2di)__a, __count);\n" |
| 17279 | "}\n" |
| 17280 | "\n" |
| 17281 | "/// Left-shifts each 64-bit value in the 128-bit integer vector operand\n" |
| 17282 | "/// by the specified number of bits. Low-order bits are cleared.\n" |
| 17283 | "///\n" |
| 17284 | "/// \\headerfile <x86intrin.h>\n" |
| 17285 | "///\n" |
| 17286 | "/// This intrinsic corresponds to the <c> VPSLLQ / PSLLQ </c> instruction.\n" |
| 17287 | "///\n" |
| 17288 | "/// \\param __a\n" |
| 17289 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17290 | "/// \\param __count\n" |
| 17291 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
| 17292 | "/// to left-shift each value in operand \\a __a.\n" |
| 17293 | "/// \\returns A 128-bit integer vector containing the left-shifted values.\n" |
| 17294 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17295 | "_mm_sll_epi64(__m128i __a, __m128i __count)\n" |
| 17296 | "{\n" |
| 17297 | " return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);\n" |
| 17298 | "}\n" |
| 17299 | "\n" |
| 17300 | "/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n" |
| 17301 | "/// by the specified number of bits. High-order bits are filled with the sign\n" |
| 17302 | "/// bit of the initial value.\n" |
| 17303 | "///\n" |
| 17304 | "/// \\headerfile <x86intrin.h>\n" |
| 17305 | "///\n" |
| 17306 | "/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n" |
| 17307 | "///\n" |
| 17308 | "/// \\param __a\n" |
| 17309 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17310 | "/// \\param __count\n" |
| 17311 | "/// An integer value specifying the number of bits to right-shift each value\n" |
| 17312 | "/// in operand \\a __a.\n" |
| 17313 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17314 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17315 | "_mm_srai_epi16(__m128i __a, int __count)\n" |
| 17316 | "{\n" |
| 17317 | " return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);\n" |
| 17318 | "}\n" |
| 17319 | "\n" |
| 17320 | "/// Right-shifts each 16-bit value in the 128-bit integer vector operand\n" |
| 17321 | "/// by the specified number of bits. High-order bits are filled with the sign\n" |
| 17322 | "/// bit of the initial value.\n" |
| 17323 | "///\n" |
| 17324 | "/// \\headerfile <x86intrin.h>\n" |
| 17325 | "///\n" |
| 17326 | "/// This intrinsic corresponds to the <c> VPSRAW / PSRAW </c> instruction.\n" |
| 17327 | "///\n" |
| 17328 | "/// \\param __a\n" |
| 17329 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17330 | "/// \\param __count\n" |
| 17331 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
| 17332 | "/// to right-shift each value in operand \\a __a.\n" |
| 17333 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17334 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17335 | "_mm_sra_epi16(__m128i __a, __m128i __count)\n" |
| 17336 | "{\n" |
| 17337 | " return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);\n" |
| 17338 | "}\n" |
| 17339 | "\n" |
| 17340 | "/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n" |
| 17341 | "/// by the specified number of bits. High-order bits are filled with the sign\n" |
| 17342 | "/// bit of the initial value.\n" |
| 17343 | "///\n" |
| 17344 | "/// \\headerfile <x86intrin.h>\n" |
| 17345 | "///\n" |
| 17346 | "/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n" |
| 17347 | "///\n" |
| 17348 | "/// \\param __a\n" |
| 17349 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17350 | "/// \\param __count\n" |
| 17351 | "/// An integer value specifying the number of bits to right-shift each value\n" |
| 17352 | "/// in operand \\a __a.\n" |
| 17353 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17354 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17355 | "_mm_srai_epi32(__m128i __a, int __count)\n" |
| 17356 | "{\n" |
| 17357 | " return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);\n" |
| 17358 | "}\n" |
| 17359 | "\n" |
| 17360 | "/// Right-shifts each 32-bit value in the 128-bit integer vector operand\n" |
| 17361 | "/// by the specified number of bits. High-order bits are filled with the sign\n" |
| 17362 | "/// bit of the initial value.\n" |
| 17363 | "///\n" |
| 17364 | "/// \\headerfile <x86intrin.h>\n" |
| 17365 | "///\n" |
| 17366 | "/// This intrinsic corresponds to the <c> VPSRAD / PSRAD </c> instruction.\n" |
| 17367 | "///\n" |
| 17368 | "/// \\param __a\n" |
| 17369 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17370 | "/// \\param __count\n" |
| 17371 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
| 17372 | "/// to right-shift each value in operand \\a __a.\n" |
| 17373 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17374 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17375 | "_mm_sra_epi32(__m128i __a, __m128i __count)\n" |
| 17376 | "{\n" |
| 17377 | " return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);\n" |
| 17378 | "}\n" |
| 17379 | "\n" |
| 17380 | "/// Right-shifts the 128-bit integer vector operand by the specified\n" |
| 17381 | "/// number of bytes. High-order bits are cleared.\n" |
| 17382 | "///\n" |
| 17383 | "/// \\headerfile <x86intrin.h>\n" |
| 17384 | "///\n" |
| 17385 | "/// \\code\n" |
| 17386 | "/// __m128i _mm_srli_si128(__m128i a, const int imm);\n" |
| 17387 | "/// \\endcode\n" |
| 17388 | "///\n" |
| 17389 | "/// This intrinsic corresponds to the <c> VPSRLDQ / PSRLDQ </c> instruction.\n" |
| 17390 | "///\n" |
| 17391 | "/// \\param a\n" |
| 17392 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17393 | "/// \\param imm\n" |
| 17394 | "/// An immediate value specifying the number of bytes to right-shift operand\n" |
| 17395 | "/// \\a a.\n" |
| 17396 | "/// \\returns A 128-bit integer vector containing the right-shifted value.\n" |
| 17397 | "#define _mm_srli_si128(a, imm) \\\n" |
| 17398 | " (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n" |
| 17399 | "\n" |
| 17400 | "#define _mm_bsrli_si128(a, imm) \\\n" |
| 17401 | " (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))\n" |
| 17402 | "\n" |
| 17403 | "/// Right-shifts each of 16-bit values in the 128-bit integer vector\n" |
| 17404 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
| 17405 | "///\n" |
| 17406 | "/// \\headerfile <x86intrin.h>\n" |
| 17407 | "///\n" |
| 17408 | "/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n" |
| 17409 | "///\n" |
| 17410 | "/// \\param __a\n" |
| 17411 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17412 | "/// \\param __count\n" |
| 17413 | "/// An integer value specifying the number of bits to right-shift each value\n" |
| 17414 | "/// in operand \\a __a.\n" |
| 17415 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17416 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17417 | "_mm_srli_epi16(__m128i __a, int __count)\n" |
| 17418 | "{\n" |
| 17419 | " return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);\n" |
| 17420 | "}\n" |
| 17421 | "\n" |
| 17422 | "/// Right-shifts each of 16-bit values in the 128-bit integer vector\n" |
| 17423 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
| 17424 | "///\n" |
| 17425 | "/// \\headerfile <x86intrin.h>\n" |
| 17426 | "///\n" |
| 17427 | "/// This intrinsic corresponds to the <c> VPSRLW / PSRLW </c> instruction.\n" |
| 17428 | "///\n" |
| 17429 | "/// \\param __a\n" |
| 17430 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17431 | "/// \\param __count\n" |
| 17432 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
| 17433 | "/// to right-shift each value in operand \\a __a.\n" |
| 17434 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17435 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17436 | "_mm_srl_epi16(__m128i __a, __m128i __count)\n" |
| 17437 | "{\n" |
| 17438 | " return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);\n" |
| 17439 | "}\n" |
| 17440 | "\n" |
| 17441 | "/// Right-shifts each of 32-bit values in the 128-bit integer vector\n" |
| 17442 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
| 17443 | "///\n" |
| 17444 | "/// \\headerfile <x86intrin.h>\n" |
| 17445 | "///\n" |
| 17446 | "/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n" |
| 17447 | "///\n" |
| 17448 | "/// \\param __a\n" |
| 17449 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17450 | "/// \\param __count\n" |
| 17451 | "/// An integer value specifying the number of bits to right-shift each value\n" |
| 17452 | "/// in operand \\a __a.\n" |
| 17453 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17454 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17455 | "_mm_srli_epi32(__m128i __a, int __count)\n" |
| 17456 | "{\n" |
| 17457 | " return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);\n" |
| 17458 | "}\n" |
| 17459 | "\n" |
| 17460 | "/// Right-shifts each of 32-bit values in the 128-bit integer vector\n" |
| 17461 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
| 17462 | "///\n" |
| 17463 | "/// \\headerfile <x86intrin.h>\n" |
| 17464 | "///\n" |
| 17465 | "/// This intrinsic corresponds to the <c> VPSRLD / PSRLD </c> instruction.\n" |
| 17466 | "///\n" |
| 17467 | "/// \\param __a\n" |
| 17468 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17469 | "/// \\param __count\n" |
| 17470 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
| 17471 | "/// to right-shift each value in operand \\a __a.\n" |
| 17472 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17473 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17474 | "_mm_srl_epi32(__m128i __a, __m128i __count)\n" |
| 17475 | "{\n" |
| 17476 | " return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);\n" |
| 17477 | "}\n" |
| 17478 | "\n" |
| 17479 | "/// Right-shifts each of 64-bit values in the 128-bit integer vector\n" |
| 17480 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
| 17481 | "///\n" |
| 17482 | "/// \\headerfile <x86intrin.h>\n" |
| 17483 | "///\n" |
| 17484 | "/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n" |
| 17485 | "///\n" |
| 17486 | "/// \\param __a\n" |
| 17487 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17488 | "/// \\param __count\n" |
| 17489 | "/// An integer value specifying the number of bits to right-shift each value\n" |
| 17490 | "/// in operand \\a __a.\n" |
| 17491 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17492 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17493 | "_mm_srli_epi64(__m128i __a, int __count)\n" |
| 17494 | "{\n" |
| 17495 | " return __builtin_ia32_psrlqi128((__v2di)__a, __count);\n" |
| 17496 | "}\n" |
| 17497 | "\n" |
| 17498 | "/// Right-shifts each of 64-bit values in the 128-bit integer vector\n" |
| 17499 | "/// operand by the specified number of bits. High-order bits are cleared.\n" |
| 17500 | "///\n" |
| 17501 | "/// \\headerfile <x86intrin.h>\n" |
| 17502 | "///\n" |
| 17503 | "/// This intrinsic corresponds to the <c> VPSRLQ / PSRLQ </c> instruction.\n" |
| 17504 | "///\n" |
| 17505 | "/// \\param __a\n" |
| 17506 | "/// A 128-bit integer vector containing the source operand.\n" |
| 17507 | "/// \\param __count\n" |
| 17508 | "/// A 128-bit integer vector in which bits [63:0] specify the number of bits\n" |
| 17509 | "/// to right-shift each value in operand \\a __a.\n" |
| 17510 | "/// \\returns A 128-bit integer vector containing the right-shifted values.\n" |
| 17511 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17512 | "_mm_srl_epi64(__m128i __a, __m128i __count)\n" |
| 17513 | "{\n" |
| 17514 | " return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);\n" |
| 17515 | "}\n" |
| 17516 | "\n" |
| 17517 | "/// Compares each of the corresponding 8-bit values of the 128-bit\n" |
| 17518 | "/// integer vectors for equality. Each comparison yields 0x0 for false, 0xFF\n" |
| 17519 | "/// for true.\n" |
| 17520 | "///\n" |
| 17521 | "/// \\headerfile <x86intrin.h>\n" |
| 17522 | "///\n" |
| 17523 | "/// This intrinsic corresponds to the <c> VPCMPEQB / PCMPEQB </c> instruction.\n" |
| 17524 | "///\n" |
| 17525 | "/// \\param __a\n" |
| 17526 | "/// A 128-bit integer vector.\n" |
| 17527 | "/// \\param __b\n" |
| 17528 | "/// A 128-bit integer vector.\n" |
| 17529 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17530 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17531 | "_mm_cmpeq_epi8(__m128i __a, __m128i __b)\n" |
| 17532 | "{\n" |
| 17533 | " return (__m128i)((__v16qi)__a == (__v16qi)__b);\n" |
| 17534 | "}\n" |
| 17535 | "\n" |
| 17536 | "/// Compares each of the corresponding 16-bit values of the 128-bit\n" |
| 17537 | "/// integer vectors for equality. Each comparison yields 0x0 for false,\n" |
| 17538 | "/// 0xFFFF for true.\n" |
| 17539 | "///\n" |
| 17540 | "/// \\headerfile <x86intrin.h>\n" |
| 17541 | "///\n" |
| 17542 | "/// This intrinsic corresponds to the <c> VPCMPEQW / PCMPEQW </c> instruction.\n" |
| 17543 | "///\n" |
| 17544 | "/// \\param __a\n" |
| 17545 | "/// A 128-bit integer vector.\n" |
| 17546 | "/// \\param __b\n" |
| 17547 | "/// A 128-bit integer vector.\n" |
| 17548 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17549 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17550 | "_mm_cmpeq_epi16(__m128i __a, __m128i __b)\n" |
| 17551 | "{\n" |
| 17552 | " return (__m128i)((__v8hi)__a == (__v8hi)__b);\n" |
| 17553 | "}\n" |
| 17554 | "\n" |
| 17555 | "/// Compares each of the corresponding 32-bit values of the 128-bit\n" |
| 17556 | "/// integer vectors for equality. Each comparison yields 0x0 for false,\n" |
| 17557 | "/// 0xFFFFFFFF for true.\n" |
| 17558 | "///\n" |
| 17559 | "/// \\headerfile <x86intrin.h>\n" |
| 17560 | "///\n" |
| 17561 | "/// This intrinsic corresponds to the <c> VPCMPEQD / PCMPEQD </c> instruction.\n" |
| 17562 | "///\n" |
| 17563 | "/// \\param __a\n" |
| 17564 | "/// A 128-bit integer vector.\n" |
| 17565 | "/// \\param __b\n" |
| 17566 | "/// A 128-bit integer vector.\n" |
| 17567 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17568 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17569 | "_mm_cmpeq_epi32(__m128i __a, __m128i __b)\n" |
| 17570 | "{\n" |
| 17571 | " return (__m128i)((__v4si)__a == (__v4si)__b);\n" |
| 17572 | "}\n" |
| 17573 | "\n" |
| 17574 | "/// Compares each of the corresponding signed 8-bit values of the 128-bit\n" |
| 17575 | "/// integer vectors to determine if the values in the first operand are\n" |
| 17576 | "/// greater than those in the second operand. Each comparison yields 0x0 for\n" |
| 17577 | "/// false, 0xFF for true.\n" |
| 17578 | "///\n" |
| 17579 | "/// \\headerfile <x86intrin.h>\n" |
| 17580 | "///\n" |
| 17581 | "/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n" |
| 17582 | "///\n" |
| 17583 | "/// \\param __a\n" |
| 17584 | "/// A 128-bit integer vector.\n" |
| 17585 | "/// \\param __b\n" |
| 17586 | "/// A 128-bit integer vector.\n" |
| 17587 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17588 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17589 | "_mm_cmpgt_epi8(__m128i __a, __m128i __b)\n" |
| 17590 | "{\n" |
| 17591 | " /* This function always performs a signed comparison, but __v16qi is a char\n" |
| 17592 | " which may be signed or unsigned, so use __v16qs. */\n" |
| 17593 | " return (__m128i)((__v16qs)__a > (__v16qs)__b);\n" |
| 17594 | "}\n" |
| 17595 | "\n" |
| 17596 | "/// Compares each of the corresponding signed 16-bit values of the\n" |
| 17597 | "/// 128-bit integer vectors to determine if the values in the first operand\n" |
| 17598 | "/// are greater than those in the second operand.\n" |
| 17599 | "///\n" |
| 17600 | "/// Each comparison yields 0x0 for false, 0xFFFF for true.\n" |
| 17601 | "///\n" |
| 17602 | "/// \\headerfile <x86intrin.h>\n" |
| 17603 | "///\n" |
| 17604 | "/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n" |
| 17605 | "///\n" |
| 17606 | "/// \\param __a\n" |
| 17607 | "/// A 128-bit integer vector.\n" |
| 17608 | "/// \\param __b\n" |
| 17609 | "/// A 128-bit integer vector.\n" |
| 17610 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17611 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17612 | "_mm_cmpgt_epi16(__m128i __a, __m128i __b)\n" |
| 17613 | "{\n" |
| 17614 | " return (__m128i)((__v8hi)__a > (__v8hi)__b);\n" |
| 17615 | "}\n" |
| 17616 | "\n" |
| 17617 | "/// Compares each of the corresponding signed 32-bit values of the\n" |
| 17618 | "/// 128-bit integer vectors to determine if the values in the first operand\n" |
| 17619 | "/// are greater than those in the second operand.\n" |
| 17620 | "///\n" |
| 17621 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n" |
| 17622 | "///\n" |
| 17623 | "/// \\headerfile <x86intrin.h>\n" |
| 17624 | "///\n" |
| 17625 | "/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n" |
| 17626 | "///\n" |
| 17627 | "/// \\param __a\n" |
| 17628 | "/// A 128-bit integer vector.\n" |
| 17629 | "/// \\param __b\n" |
| 17630 | "/// A 128-bit integer vector.\n" |
| 17631 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17632 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17633 | "_mm_cmpgt_epi32(__m128i __a, __m128i __b)\n" |
| 17634 | "{\n" |
| 17635 | " return (__m128i)((__v4si)__a > (__v4si)__b);\n" |
| 17636 | "}\n" |
| 17637 | "\n" |
| 17638 | "/// Compares each of the corresponding signed 8-bit values of the 128-bit\n" |
| 17639 | "/// integer vectors to determine if the values in the first operand are less\n" |
| 17640 | "/// than those in the second operand.\n" |
| 17641 | "///\n" |
| 17642 | "/// Each comparison yields 0x0 for false, 0xFF for true.\n" |
| 17643 | "///\n" |
| 17644 | "/// \\headerfile <x86intrin.h>\n" |
| 17645 | "///\n" |
| 17646 | "/// This intrinsic corresponds to the <c> VPCMPGTB / PCMPGTB </c> instruction.\n" |
| 17647 | "///\n" |
| 17648 | "/// \\param __a\n" |
| 17649 | "/// A 128-bit integer vector.\n" |
| 17650 | "/// \\param __b\n" |
| 17651 | "/// A 128-bit integer vector.\n" |
| 17652 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17653 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17654 | "_mm_cmplt_epi8(__m128i __a, __m128i __b)\n" |
| 17655 | "{\n" |
| 17656 | " return _mm_cmpgt_epi8(__b, __a);\n" |
| 17657 | "}\n" |
| 17658 | "\n" |
| 17659 | "/// Compares each of the corresponding signed 16-bit values of the\n" |
| 17660 | "/// 128-bit integer vectors to determine if the values in the first operand\n" |
| 17661 | "/// are less than those in the second operand.\n" |
| 17662 | "///\n" |
| 17663 | "/// Each comparison yields 0x0 for false, 0xFFFF for true.\n" |
| 17664 | "///\n" |
| 17665 | "/// \\headerfile <x86intrin.h>\n" |
| 17666 | "///\n" |
| 17667 | "/// This intrinsic corresponds to the <c> VPCMPGTW / PCMPGTW </c> instruction.\n" |
| 17668 | "///\n" |
| 17669 | "/// \\param __a\n" |
| 17670 | "/// A 128-bit integer vector.\n" |
| 17671 | "/// \\param __b\n" |
| 17672 | "/// A 128-bit integer vector.\n" |
| 17673 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17674 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17675 | "_mm_cmplt_epi16(__m128i __a, __m128i __b)\n" |
| 17676 | "{\n" |
| 17677 | " return _mm_cmpgt_epi16(__b, __a);\n" |
| 17678 | "}\n" |
| 17679 | "\n" |
| 17680 | "/// Compares each of the corresponding signed 32-bit values of the\n" |
| 17681 | "/// 128-bit integer vectors to determine if the values in the first operand\n" |
| 17682 | "/// are less than those in the second operand.\n" |
| 17683 | "///\n" |
| 17684 | "/// Each comparison yields 0x0 for false, 0xFFFFFFFF for true.\n" |
| 17685 | "///\n" |
| 17686 | "/// \\headerfile <x86intrin.h>\n" |
| 17687 | "///\n" |
| 17688 | "/// This intrinsic corresponds to the <c> VPCMPGTD / PCMPGTD </c> instruction.\n" |
| 17689 | "///\n" |
| 17690 | "/// \\param __a\n" |
| 17691 | "/// A 128-bit integer vector.\n" |
| 17692 | "/// \\param __b\n" |
| 17693 | "/// A 128-bit integer vector.\n" |
| 17694 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 17695 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17696 | "_mm_cmplt_epi32(__m128i __a, __m128i __b)\n" |
| 17697 | "{\n" |
| 17698 | " return _mm_cmpgt_epi32(__b, __a);\n" |
| 17699 | "}\n" |
| 17700 | "\n" |
| 17701 | "#ifdef __x86_64__\n" |
| 17702 | "/// Converts a 64-bit signed integer value from the second operand into a\n" |
| 17703 | "/// double-precision value and returns it in the lower element of a [2 x\n" |
| 17704 | "/// double] vector; the upper element of the returned vector is copied from\n" |
| 17705 | "/// the upper element of the first operand.\n" |
| 17706 | "///\n" |
| 17707 | "/// \\headerfile <x86intrin.h>\n" |
| 17708 | "///\n" |
| 17709 | "/// This intrinsic corresponds to the <c> VCVTSI2SD / CVTSI2SD </c> instruction.\n" |
| 17710 | "///\n" |
| 17711 | "/// \\param __a\n" |
| 17712 | "/// A 128-bit vector of [2 x double]. The upper 64 bits of this operand are\n" |
| 17713 | "/// copied to the upper 64 bits of the destination.\n" |
| 17714 | "/// \\param __b\n" |
| 17715 | "/// A 64-bit signed integer operand containing the value to be converted.\n" |
| 17716 | "/// \\returns A 128-bit vector of [2 x double] whose lower 64 bits contain the\n" |
| 17717 | "/// converted value of the second operand. The upper 64 bits are copied from\n" |
| 17718 | "/// the upper 64 bits of the first operand.\n" |
| 17719 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 17720 | "_mm_cvtsi64_sd(__m128d __a, long long __b)\n" |
| 17721 | "{\n" |
| 17722 | " __a[0] = __b;\n" |
| 17723 | " return __a;\n" |
| 17724 | "}\n" |
| 17725 | "\n" |
| 17726 | "/// Converts the first (lower) element of a vector of [2 x double] into a\n" |
| 17727 | "/// 64-bit signed integer value, according to the current rounding mode.\n" |
| 17728 | "///\n" |
| 17729 | "/// \\headerfile <x86intrin.h>\n" |
| 17730 | "///\n" |
| 17731 | "/// This intrinsic corresponds to the <c> VCVTSD2SI / CVTSD2SI </c> instruction.\n" |
| 17732 | "///\n" |
| 17733 | "/// \\param __a\n" |
| 17734 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n" |
| 17735 | "/// conversion.\n" |
| 17736 | "/// \\returns A 64-bit signed integer containing the converted value.\n" |
| 17737 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
| 17738 | "_mm_cvtsd_si64(__m128d __a)\n" |
| 17739 | "{\n" |
| 17740 | " return __builtin_ia32_cvtsd2si64((__v2df)__a);\n" |
| 17741 | "}\n" |
| 17742 | "\n" |
| 17743 | "/// Converts the first (lower) element of a vector of [2 x double] into a\n" |
| 17744 | "/// 64-bit signed integer value, truncating the result when it is inexact.\n" |
| 17745 | "///\n" |
| 17746 | "/// \\headerfile <x86intrin.h>\n" |
| 17747 | "///\n" |
| 17748 | "/// This intrinsic corresponds to the <c> VCVTTSD2SI / CVTTSD2SI </c>\n" |
| 17749 | "/// instruction.\n" |
| 17750 | "///\n" |
| 17751 | "/// \\param __a\n" |
| 17752 | "/// A 128-bit vector of [2 x double]. The lower 64 bits are used in the\n" |
| 17753 | "/// conversion.\n" |
| 17754 | "/// \\returns A 64-bit signed integer containing the converted value.\n" |
| 17755 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
| 17756 | "_mm_cvttsd_si64(__m128d __a)\n" |
| 17757 | "{\n" |
| 17758 | " return __builtin_ia32_cvttsd2si64((__v2df)__a);\n" |
| 17759 | "}\n" |
| 17760 | "#endif\n" |
| 17761 | "\n" |
| 17762 | "/// Converts a vector of [4 x i32] into a vector of [4 x float].\n" |
| 17763 | "///\n" |
| 17764 | "/// \\headerfile <x86intrin.h>\n" |
| 17765 | "///\n" |
| 17766 | "/// This intrinsic corresponds to the <c> VCVTDQ2PS / CVTDQ2PS </c> instruction.\n" |
| 17767 | "///\n" |
| 17768 | "/// \\param __a\n" |
| 17769 | "/// A 128-bit integer vector.\n" |
| 17770 | "/// \\returns A 128-bit vector of [4 x float] containing the converted values.\n" |
| 17771 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 17772 | "_mm_cvtepi32_ps(__m128i __a)\n" |
| 17773 | "{\n" |
| 17774 | " return (__m128)__builtin_convertvector((__v4si)__a, __v4sf);\n" |
| 17775 | "}\n" |
| 17776 | "\n" |
| 17777 | "/// Converts a vector of [4 x float] into a vector of [4 x i32].\n" |
| 17778 | "///\n" |
| 17779 | "/// \\headerfile <x86intrin.h>\n" |
| 17780 | "///\n" |
| 17781 | "/// This intrinsic corresponds to the <c> VCVTPS2DQ / CVTPS2DQ </c> instruction.\n" |
| 17782 | "///\n" |
| 17783 | "/// \\param __a\n" |
| 17784 | "/// A 128-bit vector of [4 x float].\n" |
| 17785 | "/// \\returns A 128-bit integer vector of [4 x i32] containing the converted\n" |
| 17786 | "/// values.\n" |
| 17787 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17788 | "_mm_cvtps_epi32(__m128 __a)\n" |
| 17789 | "{\n" |
| 17790 | " return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);\n" |
| 17791 | "}\n" |
| 17792 | "\n" |
| 17793 | "/// Converts a vector of [4 x float] into a vector of [4 x i32],\n" |
| 17794 | "/// truncating the result when it is inexact.\n" |
| 17795 | "///\n" |
| 17796 | "/// \\headerfile <x86intrin.h>\n" |
| 17797 | "///\n" |
| 17798 | "/// This intrinsic corresponds to the <c> VCVTTPS2DQ / CVTTPS2DQ </c>\n" |
| 17799 | "/// instruction.\n" |
| 17800 | "///\n" |
| 17801 | "/// \\param __a\n" |
| 17802 | "/// A 128-bit vector of [4 x float].\n" |
| 17803 | "/// \\returns A 128-bit vector of [4 x i32] containing the converted values.\n" |
| 17804 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17805 | "_mm_cvttps_epi32(__m128 __a)\n" |
| 17806 | "{\n" |
| 17807 | " return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);\n" |
| 17808 | "}\n" |
| 17809 | "\n" |
| 17810 | "/// Returns a vector of [4 x i32] where the lowest element is the input\n" |
| 17811 | "/// operand and the remaining elements are zero.\n" |
| 17812 | "///\n" |
| 17813 | "/// \\headerfile <x86intrin.h>\n" |
| 17814 | "///\n" |
| 17815 | "/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n" |
| 17816 | "///\n" |
| 17817 | "/// \\param __a\n" |
| 17818 | "/// A 32-bit signed integer operand.\n" |
| 17819 | "/// \\returns A 128-bit vector of [4 x i32].\n" |
| 17820 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17821 | "_mm_cvtsi32_si128(int __a)\n" |
| 17822 | "{\n" |
| 17823 | " return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 };\n" |
| 17824 | "}\n" |
| 17825 | "\n" |
| 17826 | "#ifdef __x86_64__\n" |
| 17827 | "/// Returns a vector of [2 x i64] where the lower element is the input\n" |
| 17828 | "/// operand and the upper element is zero.\n" |
| 17829 | "///\n" |
| 17830 | "/// \\headerfile <x86intrin.h>\n" |
| 17831 | "///\n" |
| 17832 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
| 17833 | "///\n" |
| 17834 | "/// \\param __a\n" |
| 17835 | "/// A 64-bit signed integer operand containing the value to be converted.\n" |
| 17836 | "/// \\returns A 128-bit vector of [2 x i64] containing the converted value.\n" |
| 17837 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17838 | "_mm_cvtsi64_si128(long long __a)\n" |
| 17839 | "{\n" |
| 17840 | " return __extension__ (__m128i)(__v2di){ __a, 0 };\n" |
| 17841 | "}\n" |
| 17842 | "#endif\n" |
| 17843 | "\n" |
| 17844 | "/// Moves the least significant 32 bits of a vector of [4 x i32] to a\n" |
| 17845 | "/// 32-bit signed integer value.\n" |
| 17846 | "///\n" |
| 17847 | "/// \\headerfile <x86intrin.h>\n" |
| 17848 | "///\n" |
| 17849 | "/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.\n" |
| 17850 | "///\n" |
| 17851 | "/// \\param __a\n" |
| 17852 | "/// A vector of [4 x i32]. The least significant 32 bits are moved to the\n" |
| 17853 | "/// destination.\n" |
| 17854 | "/// \\returns A 32-bit signed integer containing the moved value.\n" |
| 17855 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 17856 | "_mm_cvtsi128_si32(__m128i __a)\n" |
| 17857 | "{\n" |
| 17858 | " __v4si __b = (__v4si)__a;\n" |
| 17859 | " return __b[0];\n" |
| 17860 | "}\n" |
| 17861 | "\n" |
| 17862 | "#ifdef __x86_64__\n" |
| 17863 | "/// Moves the least significant 64 bits of a vector of [2 x i64] to a\n" |
| 17864 | "/// 64-bit signed integer value.\n" |
| 17865 | "///\n" |
| 17866 | "/// \\headerfile <x86intrin.h>\n" |
| 17867 | "///\n" |
| 17868 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
| 17869 | "///\n" |
| 17870 | "/// \\param __a\n" |
| 17871 | "/// A vector of [2 x i64]. The least significant 64 bits are moved to the\n" |
| 17872 | "/// destination.\n" |
| 17873 | "/// \\returns A 64-bit signed integer containing the moved value.\n" |
| 17874 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
| 17875 | "_mm_cvtsi128_si64(__m128i __a)\n" |
| 17876 | "{\n" |
| 17877 | " return __a[0];\n" |
| 17878 | "}\n" |
| 17879 | "#endif\n" |
| 17880 | "\n" |
| 17881 | "/// Moves packed integer values from an aligned 128-bit memory location\n" |
| 17882 | "/// to elements in a 128-bit integer vector.\n" |
| 17883 | "///\n" |
| 17884 | "/// \\headerfile <x86intrin.h>\n" |
| 17885 | "///\n" |
| 17886 | "/// This intrinsic corresponds to the <c> VMOVDQA / MOVDQA </c> instruction.\n" |
| 17887 | "///\n" |
| 17888 | "/// \\param __p\n" |
| 17889 | "/// An aligned pointer to a memory location containing integer values.\n" |
| 17890 | "/// \\returns A 128-bit integer vector containing the moved values.\n" |
| 17891 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17892 | "_mm_load_si128(__m128i const *__p)\n" |
| 17893 | "{\n" |
| 17894 | " return *__p;\n" |
| 17895 | "}\n" |
| 17896 | "\n" |
| 17897 | "/// Moves packed integer values from an unaligned 128-bit memory location\n" |
| 17898 | "/// to elements in a 128-bit integer vector.\n" |
| 17899 | "///\n" |
| 17900 | "/// \\headerfile <x86intrin.h>\n" |
| 17901 | "///\n" |
| 17902 | "/// This intrinsic corresponds to the <c> VMOVDQU / MOVDQU </c> instruction.\n" |
| 17903 | "///\n" |
| 17904 | "/// \\param __p\n" |
| 17905 | "/// A pointer to a memory location containing integer values.\n" |
| 17906 | "/// \\returns A 128-bit integer vector containing the moved values.\n" |
| 17907 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17908 | "_mm_loadu_si128(__m128i const *__p)\n" |
| 17909 | "{\n" |
| 17910 | " struct __loadu_si128 {\n" |
| 17911 | " __m128i __v;\n" |
| 17912 | " } __attribute__((__packed__, __may_alias__));\n" |
| 17913 | " return ((struct __loadu_si128*)__p)->__v;\n" |
| 17914 | "}\n" |
| 17915 | "\n" |
| 17916 | "/// Returns a vector of [2 x i64] where the lower element is taken from\n" |
| 17917 | "/// the lower element of the operand, and the upper element is zero.\n" |
| 17918 | "///\n" |
| 17919 | "/// \\headerfile <x86intrin.h>\n" |
| 17920 | "///\n" |
| 17921 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
| 17922 | "///\n" |
| 17923 | "/// \\param __p\n" |
| 17924 | "/// A 128-bit vector of [2 x i64]. Bits [63:0] are written to bits [63:0] of\n" |
| 17925 | "/// the destination.\n" |
| 17926 | "/// \\returns A 128-bit vector of [2 x i64]. The lower order bits contain the\n" |
| 17927 | "/// moved value. The higher order bits are cleared.\n" |
| 17928 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17929 | "_mm_loadl_epi64(__m128i const *__p)\n" |
| 17930 | "{\n" |
| 17931 | " struct __mm_loadl_epi64_struct {\n" |
| 17932 | " long long __u;\n" |
| 17933 | " } __attribute__((__packed__, __may_alias__));\n" |
| 17934 | " return __extension__ (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};\n" |
| 17935 | "}\n" |
| 17936 | "\n" |
| 17937 | "/// Generates a 128-bit vector of [4 x i32] with unspecified content.\n" |
| 17938 | "/// This could be used as an argument to another intrinsic function where the\n" |
| 17939 | "/// argument is required but the value is not actually used.\n" |
| 17940 | "///\n" |
| 17941 | "/// \\headerfile <x86intrin.h>\n" |
| 17942 | "///\n" |
| 17943 | "/// This intrinsic has no corresponding instruction.\n" |
| 17944 | "///\n" |
| 17945 | "/// \\returns A 128-bit vector of [4 x i32] with unspecified content.\n" |
| 17946 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17947 | "_mm_undefined_si128(void)\n" |
| 17948 | "{\n" |
| 17949 | " return (__m128i)__builtin_ia32_undef128();\n" |
| 17950 | "}\n" |
| 17951 | "\n" |
| 17952 | "/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n" |
| 17953 | "/// the specified 64-bit integer values.\n" |
| 17954 | "///\n" |
| 17955 | "/// \\headerfile <x86intrin.h>\n" |
| 17956 | "///\n" |
| 17957 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 17958 | "/// instruction.\n" |
| 17959 | "///\n" |
| 17960 | "/// \\param __q1\n" |
| 17961 | "/// A 64-bit integer value used to initialize the upper 64 bits of the\n" |
| 17962 | "/// destination vector of [2 x i64].\n" |
| 17963 | "/// \\param __q0\n" |
| 17964 | "/// A 64-bit integer value used to initialize the lower 64 bits of the\n" |
| 17965 | "/// destination vector of [2 x i64].\n" |
| 17966 | "/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n" |
| 17967 | "/// provided in the operands.\n" |
| 17968 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17969 | "_mm_set_epi64x(long long __q1, long long __q0)\n" |
| 17970 | "{\n" |
| 17971 | " return __extension__ (__m128i)(__v2di){ __q0, __q1 };\n" |
| 17972 | "}\n" |
| 17973 | "\n" |
| 17974 | "/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with\n" |
| 17975 | "/// the specified 64-bit integer values.\n" |
| 17976 | "///\n" |
| 17977 | "/// \\headerfile <x86intrin.h>\n" |
| 17978 | "///\n" |
| 17979 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 17980 | "/// instruction.\n" |
| 17981 | "///\n" |
| 17982 | "/// \\param __q1\n" |
| 17983 | "/// A 64-bit integer value used to initialize the upper 64 bits of the\n" |
| 17984 | "/// destination vector of [2 x i64].\n" |
| 17985 | "/// \\param __q0\n" |
| 17986 | "/// A 64-bit integer value used to initialize the lower 64 bits of the\n" |
| 17987 | "/// destination vector of [2 x i64].\n" |
| 17988 | "/// \\returns An initialized 128-bit vector of [2 x i64] containing the values\n" |
| 17989 | "/// provided in the operands.\n" |
| 17990 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 17991 | "_mm_set_epi64(__m64 __q1, __m64 __q0)\n" |
| 17992 | "{\n" |
| 17993 | " return _mm_set_epi64x((long long)__q1, (long long)__q0);\n" |
| 17994 | "}\n" |
| 17995 | "\n" |
| 17996 | "/// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with\n" |
| 17997 | "/// the specified 32-bit integer values.\n" |
| 17998 | "///\n" |
| 17999 | "/// \\headerfile <x86intrin.h>\n" |
| 18000 | "///\n" |
| 18001 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18002 | "/// instruction.\n" |
| 18003 | "///\n" |
| 18004 | "/// \\param __i3\n" |
| 18005 | "/// A 32-bit integer value used to initialize bits [127:96] of the\n" |
| 18006 | "/// destination vector.\n" |
| 18007 | "/// \\param __i2\n" |
| 18008 | "/// A 32-bit integer value used to initialize bits [95:64] of the destination\n" |
| 18009 | "/// vector.\n" |
| 18010 | "/// \\param __i1\n" |
| 18011 | "/// A 32-bit integer value used to initialize bits [63:32] of the destination\n" |
| 18012 | "/// vector.\n" |
| 18013 | "/// \\param __i0\n" |
| 18014 | "/// A 32-bit integer value used to initialize bits [31:0] of the destination\n" |
| 18015 | "/// vector.\n" |
| 18016 | "/// \\returns An initialized 128-bit vector of [4 x i32] containing the values\n" |
| 18017 | "/// provided in the operands.\n" |
| 18018 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18019 | "_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)\n" |
| 18020 | "{\n" |
| 18021 | " return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3};\n" |
| 18022 | "}\n" |
| 18023 | "\n" |
| 18024 | "/// Initializes the 16-bit values in a 128-bit vector of [8 x i16] with\n" |
| 18025 | "/// the specified 16-bit integer values.\n" |
| 18026 | "///\n" |
| 18027 | "/// \\headerfile <x86intrin.h>\n" |
| 18028 | "///\n" |
| 18029 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18030 | "/// instruction.\n" |
| 18031 | "///\n" |
| 18032 | "/// \\param __w7\n" |
| 18033 | "/// A 16-bit integer value used to initialize bits [127:112] of the\n" |
| 18034 | "/// destination vector.\n" |
| 18035 | "/// \\param __w6\n" |
| 18036 | "/// A 16-bit integer value used to initialize bits [111:96] of the\n" |
| 18037 | "/// destination vector.\n" |
| 18038 | "/// \\param __w5\n" |
| 18039 | "/// A 16-bit integer value used to initialize bits [95:80] of the destination\n" |
| 18040 | "/// vector.\n" |
| 18041 | "/// \\param __w4\n" |
| 18042 | "/// A 16-bit integer value used to initialize bits [79:64] of the destination\n" |
| 18043 | "/// vector.\n" |
| 18044 | "/// \\param __w3\n" |
| 18045 | "/// A 16-bit integer value used to initialize bits [63:48] of the destination\n" |
| 18046 | "/// vector.\n" |
| 18047 | "/// \\param __w2\n" |
| 18048 | "/// A 16-bit integer value used to initialize bits [47:32] of the destination\n" |
| 18049 | "/// vector.\n" |
| 18050 | "/// \\param __w1\n" |
| 18051 | "/// A 16-bit integer value used to initialize bits [31:16] of the destination\n" |
| 18052 | "/// vector.\n" |
| 18053 | "/// \\param __w0\n" |
| 18054 | "/// A 16-bit integer value used to initialize bits [15:0] of the destination\n" |
| 18055 | "/// vector.\n" |
| 18056 | "/// \\returns An initialized 128-bit vector of [8 x i16] containing the values\n" |
| 18057 | "/// provided in the operands.\n" |
| 18058 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18059 | "_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)\n" |
| 18060 | "{\n" |
| 18061 | " return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };\n" |
| 18062 | "}\n" |
| 18063 | "\n" |
| 18064 | "/// Initializes the 8-bit values in a 128-bit vector of [16 x i8] with\n" |
| 18065 | "/// the specified 8-bit integer values.\n" |
| 18066 | "///\n" |
| 18067 | "/// \\headerfile <x86intrin.h>\n" |
| 18068 | "///\n" |
| 18069 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18070 | "/// instruction.\n" |
| 18071 | "///\n" |
| 18072 | "/// \\param __b15\n" |
| 18073 | "/// Initializes bits [127:120] of the destination vector.\n" |
| 18074 | "/// \\param __b14\n" |
| 18075 | "/// Initializes bits [119:112] of the destination vector.\n" |
| 18076 | "/// \\param __b13\n" |
| 18077 | "/// Initializes bits [111:104] of the destination vector.\n" |
| 18078 | "/// \\param __b12\n" |
| 18079 | "/// Initializes bits [103:96] of the destination vector.\n" |
| 18080 | "/// \\param __b11\n" |
| 18081 | "/// Initializes bits [95:88] of the destination vector.\n" |
| 18082 | "/// \\param __b10\n" |
| 18083 | "/// Initializes bits [87:80] of the destination vector.\n" |
| 18084 | "/// \\param __b9\n" |
| 18085 | "/// Initializes bits [79:72] of the destination vector.\n" |
| 18086 | "/// \\param __b8\n" |
| 18087 | "/// Initializes bits [71:64] of the destination vector.\n" |
| 18088 | "/// \\param __b7\n" |
| 18089 | "/// Initializes bits [63:56] of the destination vector.\n" |
| 18090 | "/// \\param __b6\n" |
| 18091 | "/// Initializes bits [55:48] of the destination vector.\n" |
| 18092 | "/// \\param __b5\n" |
| 18093 | "/// Initializes bits [47:40] of the destination vector.\n" |
| 18094 | "/// \\param __b4\n" |
| 18095 | "/// Initializes bits [39:32] of the destination vector.\n" |
| 18096 | "/// \\param __b3\n" |
| 18097 | "/// Initializes bits [31:24] of the destination vector.\n" |
| 18098 | "/// \\param __b2\n" |
| 18099 | "/// Initializes bits [23:16] of the destination vector.\n" |
| 18100 | "/// \\param __b1\n" |
| 18101 | "/// Initializes bits [15:8] of the destination vector.\n" |
| 18102 | "/// \\param __b0\n" |
| 18103 | "/// Initializes bits [7:0] of the destination vector.\n" |
| 18104 | "/// \\returns An initialized 128-bit vector of [16 x i8] containing the values\n" |
| 18105 | "/// provided in the operands.\n" |
| 18106 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18107 | "_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)\n" |
| 18108 | "{\n" |
| 18109 | " return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };\n" |
| 18110 | "}\n" |
| 18111 | "\n" |
| 18112 | "/// Initializes both values in a 128-bit integer vector with the\n" |
| 18113 | "/// specified 64-bit integer value.\n" |
| 18114 | "///\n" |
| 18115 | "/// \\headerfile <x86intrin.h>\n" |
| 18116 | "///\n" |
| 18117 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18118 | "/// instruction.\n" |
| 18119 | "///\n" |
| 18120 | "/// \\param __q\n" |
| 18121 | "/// Integer value used to initialize the elements of the destination integer\n" |
| 18122 | "/// vector.\n" |
| 18123 | "/// \\returns An initialized 128-bit integer vector of [2 x i64] with both\n" |
| 18124 | "/// elements containing the value provided in the operand.\n" |
| 18125 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18126 | "_mm_set1_epi64x(long long __q)\n" |
| 18127 | "{\n" |
| 18128 | " return _mm_set_epi64x(__q, __q);\n" |
| 18129 | "}\n" |
| 18130 | "\n" |
| 18131 | "/// Initializes both values in a 128-bit vector of [2 x i64] with the\n" |
| 18132 | "/// specified 64-bit value.\n" |
| 18133 | "///\n" |
| 18134 | "/// \\headerfile <x86intrin.h>\n" |
| 18135 | "///\n" |
| 18136 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18137 | "/// instruction.\n" |
| 18138 | "///\n" |
| 18139 | "/// \\param __q\n" |
| 18140 | "/// A 64-bit value used to initialize the elements of the destination integer\n" |
| 18141 | "/// vector.\n" |
| 18142 | "/// \\returns An initialized 128-bit vector of [2 x i64] with all elements\n" |
| 18143 | "/// containing the value provided in the operand.\n" |
| 18144 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18145 | "_mm_set1_epi64(__m64 __q)\n" |
| 18146 | "{\n" |
| 18147 | " return _mm_set_epi64(__q, __q);\n" |
| 18148 | "}\n" |
| 18149 | "\n" |
| 18150 | "/// Initializes all values in a 128-bit vector of [4 x i32] with the\n" |
| 18151 | "/// specified 32-bit value.\n" |
| 18152 | "///\n" |
| 18153 | "/// \\headerfile <x86intrin.h>\n" |
| 18154 | "///\n" |
| 18155 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18156 | "/// instruction.\n" |
| 18157 | "///\n" |
| 18158 | "/// \\param __i\n" |
| 18159 | "/// A 32-bit value used to initialize the elements of the destination integer\n" |
| 18160 | "/// vector.\n" |
| 18161 | "/// \\returns An initialized 128-bit vector of [4 x i32] with all elements\n" |
| 18162 | "/// containing the value provided in the operand.\n" |
| 18163 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18164 | "_mm_set1_epi32(int __i)\n" |
| 18165 | "{\n" |
| 18166 | " return _mm_set_epi32(__i, __i, __i, __i);\n" |
| 18167 | "}\n" |
| 18168 | "\n" |
| 18169 | "/// Initializes all values in a 128-bit vector of [8 x i16] with the\n" |
| 18170 | "/// specified 16-bit value.\n" |
| 18171 | "///\n" |
| 18172 | "/// \\headerfile <x86intrin.h>\n" |
| 18173 | "///\n" |
| 18174 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18175 | "/// instruction.\n" |
| 18176 | "///\n" |
| 18177 | "/// \\param __w\n" |
| 18178 | "/// A 16-bit value used to initialize the elements of the destination integer\n" |
| 18179 | "/// vector.\n" |
| 18180 | "/// \\returns An initialized 128-bit vector of [8 x i16] with all elements\n" |
| 18181 | "/// containing the value provided in the operand.\n" |
| 18182 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18183 | "_mm_set1_epi16(short __w)\n" |
| 18184 | "{\n" |
| 18185 | " return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);\n" |
| 18186 | "}\n" |
| 18187 | "\n" |
| 18188 | "/// Initializes all values in a 128-bit vector of [16 x i8] with the\n" |
| 18189 | "/// specified 8-bit value.\n" |
| 18190 | "///\n" |
| 18191 | "/// \\headerfile <x86intrin.h>\n" |
| 18192 | "///\n" |
| 18193 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18194 | "/// instruction.\n" |
| 18195 | "///\n" |
| 18196 | "/// \\param __b\n" |
| 18197 | "/// An 8-bit value used to initialize the elements of the destination integer\n" |
| 18198 | "/// vector.\n" |
| 18199 | "/// \\returns An initialized 128-bit vector of [16 x i8] with all elements\n" |
| 18200 | "/// containing the value provided in the operand.\n" |
| 18201 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18202 | "_mm_set1_epi8(char __b)\n" |
| 18203 | "{\n" |
| 18204 | " return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b);\n" |
| 18205 | "}\n" |
| 18206 | "\n" |
| 18207 | "/// Constructs a 128-bit integer vector, initialized in reverse order\n" |
| 18208 | "/// with the specified 64-bit integral values.\n" |
| 18209 | "///\n" |
| 18210 | "/// \\headerfile <x86intrin.h>\n" |
| 18211 | "///\n" |
| 18212 | "/// This intrinsic does not correspond to a specific instruction.\n" |
| 18213 | "///\n" |
| 18214 | "/// \\param __q0\n" |
| 18215 | "/// A 64-bit integral value used to initialize the lower 64 bits of the\n" |
| 18216 | "/// result.\n" |
| 18217 | "/// \\param __q1\n" |
| 18218 | "/// A 64-bit integral value used to initialize the upper 64 bits of the\n" |
| 18219 | "/// result.\n" |
| 18220 | "/// \\returns An initialized 128-bit integer vector.\n" |
| 18221 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18222 | "_mm_setr_epi64(__m64 __q0, __m64 __q1)\n" |
| 18223 | "{\n" |
| 18224 | " return _mm_set_epi64(__q1, __q0);\n" |
| 18225 | "}\n" |
| 18226 | "\n" |
| 18227 | "/// Constructs a 128-bit integer vector, initialized in reverse order\n" |
| 18228 | "/// with the specified 32-bit integral values.\n" |
| 18229 | "///\n" |
| 18230 | "/// \\headerfile <x86intrin.h>\n" |
| 18231 | "///\n" |
| 18232 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18233 | "/// instruction.\n" |
| 18234 | "///\n" |
| 18235 | "/// \\param __i0\n" |
| 18236 | "/// A 32-bit integral value used to initialize bits [31:0] of the result.\n" |
| 18237 | "/// \\param __i1\n" |
| 18238 | "/// A 32-bit integral value used to initialize bits [63:32] of the result.\n" |
| 18239 | "/// \\param __i2\n" |
| 18240 | "/// A 32-bit integral value used to initialize bits [95:64] of the result.\n" |
| 18241 | "/// \\param __i3\n" |
| 18242 | "/// A 32-bit integral value used to initialize bits [127:96] of the result.\n" |
| 18243 | "/// \\returns An initialized 128-bit integer vector.\n" |
| 18244 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18245 | "_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)\n" |
| 18246 | "{\n" |
| 18247 | " return _mm_set_epi32(__i3, __i2, __i1, __i0);\n" |
| 18248 | "}\n" |
| 18249 | "\n" |
| 18250 | "/// Constructs a 128-bit integer vector, initialized in reverse order\n" |
| 18251 | "/// with the specified 16-bit integral values.\n" |
| 18252 | "///\n" |
| 18253 | "/// \\headerfile <x86intrin.h>\n" |
| 18254 | "///\n" |
| 18255 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18256 | "/// instruction.\n" |
| 18257 | "///\n" |
| 18258 | "/// \\param __w0\n" |
| 18259 | "/// A 16-bit integral value used to initialize bits [15:0] of the result.\n" |
| 18260 | "/// \\param __w1\n" |
| 18261 | "/// A 16-bit integral value used to initialize bits [31:16] of the result.\n" |
| 18262 | "/// \\param __w2\n" |
| 18263 | "/// A 16-bit integral value used to initialize bits [47:32] of the result.\n" |
| 18264 | "/// \\param __w3\n" |
| 18265 | "/// A 16-bit integral value used to initialize bits [63:48] of the result.\n" |
| 18266 | "/// \\param __w4\n" |
| 18267 | "/// A 16-bit integral value used to initialize bits [79:64] of the result.\n" |
| 18268 | "/// \\param __w5\n" |
| 18269 | "/// A 16-bit integral value used to initialize bits [95:80] of the result.\n" |
| 18270 | "/// \\param __w6\n" |
| 18271 | "/// A 16-bit integral value used to initialize bits [111:96] of the result.\n" |
| 18272 | "/// \\param __w7\n" |
| 18273 | "/// A 16-bit integral value used to initialize bits [127:112] of the result.\n" |
| 18274 | "/// \\returns An initialized 128-bit integer vector.\n" |
| 18275 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18276 | "_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)\n" |
| 18277 | "{\n" |
| 18278 | " return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);\n" |
| 18279 | "}\n" |
| 18280 | "\n" |
| 18281 | "/// Constructs a 128-bit integer vector, initialized in reverse order\n" |
| 18282 | "/// with the specified 8-bit integral values.\n" |
| 18283 | "///\n" |
| 18284 | "/// \\headerfile <x86intrin.h>\n" |
| 18285 | "///\n" |
| 18286 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 18287 | "/// instruction.\n" |
| 18288 | "///\n" |
| 18289 | "/// \\param __b0\n" |
| 18290 | "/// An 8-bit integral value used to initialize bits [7:0] of the result.\n" |
| 18291 | "/// \\param __b1\n" |
| 18292 | "/// An 8-bit integral value used to initialize bits [15:8] of the result.\n" |
| 18293 | "/// \\param __b2\n" |
| 18294 | "/// An 8-bit integral value used to initialize bits [23:16] of the result.\n" |
| 18295 | "/// \\param __b3\n" |
| 18296 | "/// An 8-bit integral value used to initialize bits [31:24] of the result.\n" |
| 18297 | "/// \\param __b4\n" |
| 18298 | "/// An 8-bit integral value used to initialize bits [39:32] of the result.\n" |
| 18299 | "/// \\param __b5\n" |
| 18300 | "/// An 8-bit integral value used to initialize bits [47:40] of the result.\n" |
| 18301 | "/// \\param __b6\n" |
| 18302 | "/// An 8-bit integral value used to initialize bits [55:48] of the result.\n" |
| 18303 | "/// \\param __b7\n" |
| 18304 | "/// An 8-bit integral value used to initialize bits [63:56] of the result.\n" |
| 18305 | "/// \\param __b8\n" |
| 18306 | "/// An 8-bit integral value used to initialize bits [71:64] of the result.\n" |
| 18307 | "/// \\param __b9\n" |
| 18308 | "/// An 8-bit integral value used to initialize bits [79:72] of the result.\n" |
| 18309 | "/// \\param __b10\n" |
| 18310 | "/// An 8-bit integral value used to initialize bits [87:80] of the result.\n" |
| 18311 | "/// \\param __b11\n" |
| 18312 | "/// An 8-bit integral value used to initialize bits [95:88] of the result.\n" |
| 18313 | "/// \\param __b12\n" |
| 18314 | "/// An 8-bit integral value used to initialize bits [103:96] of the result.\n" |
| 18315 | "/// \\param __b13\n" |
| 18316 | "/// An 8-bit integral value used to initialize bits [111:104] of the result.\n" |
| 18317 | "/// \\param __b14\n" |
| 18318 | "/// An 8-bit integral value used to initialize bits [119:112] of the result.\n" |
| 18319 | "/// \\param __b15\n" |
| 18320 | "/// An 8-bit integral value used to initialize bits [127:120] of the result.\n" |
| 18321 | "/// \\returns An initialized 128-bit integer vector.\n" |
| 18322 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18323 | "_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)\n" |
| 18324 | "{\n" |
| 18325 | " return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n" |
| 18326 | "}\n" |
| 18327 | "\n" |
| 18328 | "/// Creates a 128-bit integer vector initialized to zero.\n" |
| 18329 | "///\n" |
| 18330 | "/// \\headerfile <x86intrin.h>\n" |
| 18331 | "///\n" |
| 18332 | "/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n" |
| 18333 | "///\n" |
| 18334 | "/// \\returns An initialized 128-bit integer vector with all elements set to\n" |
| 18335 | "/// zero.\n" |
| 18336 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18337 | "_mm_setzero_si128(void)\n" |
| 18338 | "{\n" |
| 18339 | " return __extension__ (__m128i)(__v2di){ 0LL, 0LL };\n" |
| 18340 | "}\n" |
| 18341 | "\n" |
| 18342 | "/// Stores a 128-bit integer vector to a memory location aligned on a\n" |
| 18343 | "/// 128-bit boundary.\n" |
| 18344 | "///\n" |
| 18345 | "/// \\headerfile <x86intrin.h>\n" |
| 18346 | "///\n" |
| 18347 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n" |
| 18348 | "///\n" |
| 18349 | "/// \\param __p\n" |
| 18350 | "/// A pointer to an aligned memory location that will receive the integer\n" |
| 18351 | "/// values.\n" |
| 18352 | "/// \\param __b\n" |
| 18353 | "/// A 128-bit integer vector containing the values to be moved.\n" |
| 18354 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 18355 | "_mm_store_si128(__m128i *__p, __m128i __b)\n" |
| 18356 | "{\n" |
| 18357 | " *__p = __b;\n" |
| 18358 | "}\n" |
| 18359 | "\n" |
| 18360 | "/// Stores a 128-bit integer vector to an unaligned memory location.\n" |
| 18361 | "///\n" |
| 18362 | "/// \\headerfile <x86intrin.h>\n" |
| 18363 | "///\n" |
| 18364 | "/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n" |
| 18365 | "///\n" |
| 18366 | "/// \\param __p\n" |
| 18367 | "/// A pointer to a memory location that will receive the integer values.\n" |
| 18368 | "/// \\param __b\n" |
| 18369 | "/// A 128-bit integer vector containing the values to be moved.\n" |
| 18370 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 18371 | "_mm_storeu_si128(__m128i *__p, __m128i __b)\n" |
| 18372 | "{\n" |
| 18373 | " struct __storeu_si128 {\n" |
| 18374 | " __m128i __v;\n" |
| 18375 | " } __attribute__((__packed__, __may_alias__));\n" |
| 18376 | " ((struct __storeu_si128*)__p)->__v = __b;\n" |
| 18377 | "}\n" |
| 18378 | "\n" |
| 18379 | "/// Moves bytes selected by the mask from the first operand to the\n" |
| 18380 | "/// specified unaligned memory location. When a mask bit is 1, the\n" |
| 18381 | "/// corresponding byte is written, otherwise it is not written.\n" |
| 18382 | "///\n" |
| 18383 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
| 18384 | "/// used again soon). Exception and trap behavior for elements not selected\n" |
| 18385 | "/// for storage to memory are implementation dependent.\n" |
| 18386 | "///\n" |
| 18387 | "/// \\headerfile <x86intrin.h>\n" |
| 18388 | "///\n" |
| 18389 | "/// This intrinsic corresponds to the <c> VMASKMOVDQU / MASKMOVDQU </c>\n" |
| 18390 | "/// instruction.\n" |
| 18391 | "///\n" |
| 18392 | "/// \\param __d\n" |
| 18393 | "/// A 128-bit integer vector containing the values to be moved.\n" |
| 18394 | "/// \\param __n\n" |
| 18395 | "/// A 128-bit integer vector containing the mask. The most significant bit of\n" |
| 18396 | "/// each byte represents the mask bits.\n" |
| 18397 | "/// \\param __p\n" |
| 18398 | "/// A pointer to an unaligned 128-bit memory location where the specified\n" |
| 18399 | "/// values are moved.\n" |
| 18400 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 18401 | "_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)\n" |
| 18402 | "{\n" |
| 18403 | " __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);\n" |
| 18404 | "}\n" |
| 18405 | "\n" |
| 18406 | "/// Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to\n" |
| 18407 | "/// a memory location.\n" |
| 18408 | "///\n" |
| 18409 | "/// \\headerfile <x86intrin.h>\n" |
| 18410 | "///\n" |
| 18411 | "/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n" |
| 18412 | "///\n" |
| 18413 | "/// \\param __p\n" |
| 18414 | "/// A pointer to a 64-bit memory location that will receive the lower 64 bits\n" |
| 18415 | "/// of the integer vector parameter.\n" |
| 18416 | "/// \\param __a\n" |
| 18417 | "/// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the\n" |
| 18418 | "/// value to be stored.\n" |
| 18419 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 18420 | "_mm_storel_epi64(__m128i *__p, __m128i __a)\n" |
| 18421 | "{\n" |
| 18422 | " struct __mm_storel_epi64_struct {\n" |
| 18423 | " long long __u;\n" |
| 18424 | " } __attribute__((__packed__, __may_alias__));\n" |
| 18425 | " ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];\n" |
| 18426 | "}\n" |
| 18427 | "\n" |
| 18428 | "/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit\n" |
| 18429 | "/// aligned memory location.\n" |
| 18430 | "///\n" |
| 18431 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
| 18432 | "/// used again soon).\n" |
| 18433 | "///\n" |
| 18434 | "/// \\headerfile <x86intrin.h>\n" |
| 18435 | "///\n" |
| 18436 | "/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n" |
| 18437 | "///\n" |
| 18438 | "/// \\param __p\n" |
| 18439 | "/// A pointer to the 128-bit aligned memory location used to store the value.\n" |
| 18440 | "/// \\param __a\n" |
| 18441 | "/// A vector of [2 x double] containing the 64-bit values to be stored.\n" |
| 18442 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 18443 | "_mm_stream_pd(double *__p, __m128d __a)\n" |
| 18444 | "{\n" |
| 18445 | " __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);\n" |
| 18446 | "}\n" |
| 18447 | "\n" |
| 18448 | "/// Stores a 128-bit integer vector to a 128-bit aligned memory location.\n" |
| 18449 | "///\n" |
| 18450 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
| 18451 | "/// used again soon).\n" |
| 18452 | "///\n" |
| 18453 | "/// \\headerfile <x86intrin.h>\n" |
| 18454 | "///\n" |
| 18455 | "/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n" |
| 18456 | "///\n" |
| 18457 | "/// \\param __p\n" |
| 18458 | "/// A pointer to the 128-bit aligned memory location used to store the value.\n" |
| 18459 | "/// \\param __a\n" |
| 18460 | "/// A 128-bit integer vector containing the values to be stored.\n" |
| 18461 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 18462 | "_mm_stream_si128(__m128i *__p, __m128i __a)\n" |
| 18463 | "{\n" |
| 18464 | " __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);\n" |
| 18465 | "}\n" |
| 18466 | "\n" |
| 18467 | "/// Stores a 32-bit integer value in the specified memory location.\n" |
| 18468 | "///\n" |
| 18469 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
| 18470 | "/// used again soon).\n" |
| 18471 | "///\n" |
| 18472 | "/// \\headerfile <x86intrin.h>\n" |
| 18473 | "///\n" |
| 18474 | "/// This intrinsic corresponds to the <c> MOVNTI </c> instruction.\n" |
| 18475 | "///\n" |
| 18476 | "/// \\param __p\n" |
| 18477 | "/// A pointer to the 32-bit memory location used to store the value.\n" |
| 18478 | "/// \\param __a\n" |
| 18479 | "/// A 32-bit integer containing the value to be stored.\n" |
| 18480 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n" |
| 18481 | "_mm_stream_si32(int *__p, int __a)\n" |
| 18482 | "{\n" |
| 18483 | " __builtin_ia32_movnti(__p, __a);\n" |
| 18484 | "}\n" |
| 18485 | "\n" |
| 18486 | "#ifdef __x86_64__\n" |
| 18487 | "/// Stores a 64-bit integer value in the specified memory location.\n" |
| 18488 | "///\n" |
| 18489 | "/// To minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
| 18490 | "/// used again soon).\n" |
| 18491 | "///\n" |
| 18492 | "/// \\headerfile <x86intrin.h>\n" |
| 18493 | "///\n" |
| 18494 | "/// This intrinsic corresponds to the <c> MOVNTIQ </c> instruction.\n" |
| 18495 | "///\n" |
| 18496 | "/// \\param __p\n" |
| 18497 | "/// A pointer to the 64-bit memory location used to store the value.\n" |
| 18498 | "/// \\param __a\n" |
| 18499 | "/// A 64-bit integer containing the value to be stored.\n" |
| 18500 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"sse2\")))\n" |
| 18501 | "_mm_stream_si64(long long *__p, long long __a)\n" |
| 18502 | "{\n" |
| 18503 | " __builtin_ia32_movnti64(__p, __a);\n" |
| 18504 | "}\n" |
| 18505 | "#endif\n" |
| 18506 | "\n" |
| 18507 | "#if defined(__cplusplus)\n" |
| 18508 | "extern \"C\" {\n" |
| 18509 | "#endif\n" |
| 18510 | "\n" |
| 18511 | "/// The cache line containing \\a __p is flushed and invalidated from all\n" |
| 18512 | "/// caches in the coherency domain.\n" |
| 18513 | "///\n" |
| 18514 | "/// \\headerfile <x86intrin.h>\n" |
| 18515 | "///\n" |
| 18516 | "/// This intrinsic corresponds to the <c> CLFLUSH </c> instruction.\n" |
| 18517 | "///\n" |
| 18518 | "/// \\param __p\n" |
| 18519 | "/// A pointer to the memory location used to identify the cache line to be\n" |
| 18520 | "/// flushed.\n" |
| 18521 | "void _mm_clflush(void const * __p);\n" |
| 18522 | "\n" |
| 18523 | "/// Forces strong memory ordering (serialization) between load\n" |
| 18524 | "/// instructions preceding this instruction and load instructions following\n" |
| 18525 | "/// this instruction, ensuring the system completes all previous loads before\n" |
| 18526 | "/// executing subsequent loads.\n" |
| 18527 | "///\n" |
| 18528 | "/// \\headerfile <x86intrin.h>\n" |
| 18529 | "///\n" |
| 18530 | "/// This intrinsic corresponds to the <c> LFENCE </c> instruction.\n" |
| 18531 | "///\n" |
| 18532 | "void _mm_lfence(void);\n" |
| 18533 | "\n" |
| 18534 | "/// Forces strong memory ordering (serialization) between load and store\n" |
| 18535 | "/// instructions preceding this instruction and load and store instructions\n" |
| 18536 | "/// following this instruction, ensuring that the system completes all\n" |
| 18537 | "/// previous memory accesses before executing subsequent memory accesses.\n" |
| 18538 | "///\n" |
| 18539 | "/// \\headerfile <x86intrin.h>\n" |
| 18540 | "///\n" |
| 18541 | "/// This intrinsic corresponds to the <c> MFENCE </c> instruction.\n" |
| 18542 | "///\n" |
| 18543 | "void _mm_mfence(void);\n" |
| 18544 | "\n" |
| 18545 | "#if defined(__cplusplus)\n" |
| 18546 | "} // extern \"C\"\n" |
| 18547 | "#endif\n" |
| 18548 | "\n" |
| 18549 | "/// Converts 16-bit signed integers from both 128-bit integer vector\n" |
| 18550 | "/// operands into 8-bit signed integers, and packs the results into the\n" |
| 18551 | "/// destination. Positive values greater than 0x7F are saturated to 0x7F.\n" |
| 18552 | "/// Negative values less than 0x80 are saturated to 0x80.\n" |
| 18553 | "///\n" |
| 18554 | "/// \\headerfile <x86intrin.h>\n" |
| 18555 | "///\n" |
| 18556 | "/// This intrinsic corresponds to the <c> VPACKSSWB / PACKSSWB </c> instruction.\n" |
| 18557 | "///\n" |
| 18558 | "/// \\param __a\n" |
| 18559 | "/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n" |
| 18560 | "/// a signed integer and is converted to a 8-bit signed integer with\n" |
| 18561 | "/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n" |
| 18562 | "/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n" |
| 18563 | "/// written to the lower 64 bits of the result.\n" |
| 18564 | "/// \\param __b\n" |
| 18565 | "/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n" |
| 18566 | "/// a signed integer and is converted to a 8-bit signed integer with\n" |
| 18567 | "/// saturation. Values greater than 0x7F are saturated to 0x7F. Values less\n" |
| 18568 | "/// than 0x80 are saturated to 0x80. The converted [8 x i8] values are\n" |
| 18569 | "/// written to the higher 64 bits of the result.\n" |
| 18570 | "/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n" |
| 18571 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18572 | "_mm_packs_epi16(__m128i __a, __m128i __b)\n" |
| 18573 | "{\n" |
| 18574 | " return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);\n" |
| 18575 | "}\n" |
| 18576 | "\n" |
| 18577 | "/// Converts 32-bit signed integers from both 128-bit integer vector\n" |
| 18578 | "/// operands into 16-bit signed integers, and packs the results into the\n" |
| 18579 | "/// destination. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n" |
| 18580 | "/// Negative values less than 0x8000 are saturated to 0x8000.\n" |
| 18581 | "///\n" |
| 18582 | "/// \\headerfile <x86intrin.h>\n" |
| 18583 | "///\n" |
| 18584 | "/// This intrinsic corresponds to the <c> VPACKSSDW / PACKSSDW </c> instruction.\n" |
| 18585 | "///\n" |
| 18586 | "/// \\param __a\n" |
| 18587 | "/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n" |
| 18588 | "/// a signed integer and is converted to a 16-bit signed integer with\n" |
| 18589 | "/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n" |
| 18590 | "/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n" |
| 18591 | "/// are written to the lower 64 bits of the result.\n" |
| 18592 | "/// \\param __b\n" |
| 18593 | "/// A 128-bit integer vector of [4 x i32]. Each 32-bit element is treated as\n" |
| 18594 | "/// a signed integer and is converted to a 16-bit signed integer with\n" |
| 18595 | "/// saturation. Values greater than 0x7FFF are saturated to 0x7FFF. Values\n" |
| 18596 | "/// less than 0x8000 are saturated to 0x8000. The converted [4 x i16] values\n" |
| 18597 | "/// are written to the higher 64 bits of the result.\n" |
| 18598 | "/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n" |
| 18599 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18600 | "_mm_packs_epi32(__m128i __a, __m128i __b)\n" |
| 18601 | "{\n" |
| 18602 | " return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);\n" |
| 18603 | "}\n" |
| 18604 | "\n" |
| 18605 | "/// Converts 16-bit signed integers from both 128-bit integer vector\n" |
| 18606 | "/// operands into 8-bit unsigned integers, and packs the results into the\n" |
| 18607 | "/// destination. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
| 18608 | "/// than 0x00 are saturated to 0x00.\n" |
| 18609 | "///\n" |
| 18610 | "/// \\headerfile <x86intrin.h>\n" |
| 18611 | "///\n" |
| 18612 | "/// This intrinsic corresponds to the <c> VPACKUSWB / PACKUSWB </c> instruction.\n" |
| 18613 | "///\n" |
| 18614 | "/// \\param __a\n" |
| 18615 | "/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n" |
| 18616 | "/// a signed integer and is converted to an 8-bit unsigned integer with\n" |
| 18617 | "/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
| 18618 | "/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n" |
| 18619 | "/// written to the lower 64 bits of the result.\n" |
| 18620 | "/// \\param __b\n" |
| 18621 | "/// A 128-bit integer vector of [8 x i16]. Each 16-bit element is treated as\n" |
| 18622 | "/// a signed integer and is converted to an 8-bit unsigned integer with\n" |
| 18623 | "/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
| 18624 | "/// than 0x00 are saturated to 0x00. The converted [8 x i8] values are\n" |
| 18625 | "/// written to the higher 64 bits of the result.\n" |
| 18626 | "/// \\returns A 128-bit vector of [16 x i8] containing the converted values.\n" |
| 18627 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18628 | "_mm_packus_epi16(__m128i __a, __m128i __b)\n" |
| 18629 | "{\n" |
| 18630 | " return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);\n" |
| 18631 | "}\n" |
| 18632 | "\n" |
| 18633 | "/// Extracts 16 bits from a 128-bit integer vector of [8 x i16], using\n" |
| 18634 | "/// the immediate-value parameter as a selector.\n" |
| 18635 | "///\n" |
| 18636 | "/// \\headerfile <x86intrin.h>\n" |
| 18637 | "///\n" |
| 18638 | "/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n" |
| 18639 | "///\n" |
| 18640 | "/// \\param __a\n" |
| 18641 | "/// A 128-bit integer vector.\n" |
| 18642 | "/// \\param __imm\n" |
| 18643 | "/// An immediate value. Bits [2:0] selects values from \\a __a to be assigned\n" |
| 18644 | "/// to bits[15:0] of the result. \\n\n" |
| 18645 | "/// 000: assign values from bits [15:0] of \\a __a. \\n\n" |
| 18646 | "/// 001: assign values from bits [31:16] of \\a __a. \\n\n" |
| 18647 | "/// 010: assign values from bits [47:32] of \\a __a. \\n\n" |
| 18648 | "/// 011: assign values from bits [63:48] of \\a __a. \\n\n" |
| 18649 | "/// 100: assign values from bits [79:64] of \\a __a. \\n\n" |
| 18650 | "/// 101: assign values from bits [95:80] of \\a __a. \\n\n" |
| 18651 | "/// 110: assign values from bits [111:96] of \\a __a. \\n\n" |
| 18652 | "/// 111: assign values from bits [127:112] of \\a __a.\n" |
| 18653 | "/// \\returns An integer, whose lower 16 bits are selected from the 128-bit\n" |
| 18654 | "/// integer vector parameter and the remaining bits are assigned zeros.\n" |
| 18655 | "#define _mm_extract_epi16(a, imm) \\\n" |
| 18656 | " (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \\\n" |
| 18657 | " (int)(imm))\n" |
| 18658 | "\n" |
| 18659 | "/// Constructs a 128-bit integer vector by first making a copy of the\n" |
| 18660 | "/// 128-bit integer vector parameter, and then inserting the lower 16 bits\n" |
| 18661 | "/// of an integer parameter into an offset specified by the immediate-value\n" |
| 18662 | "/// parameter.\n" |
| 18663 | "///\n" |
| 18664 | "/// \\headerfile <x86intrin.h>\n" |
| 18665 | "///\n" |
| 18666 | "/// This intrinsic corresponds to the <c> VPINSRW / PINSRW </c> instruction.\n" |
| 18667 | "///\n" |
| 18668 | "/// \\param __a\n" |
| 18669 | "/// A 128-bit integer vector of [8 x i16]. This vector is copied to the\n" |
| 18670 | "/// result and then one of the eight elements in the result is replaced by\n" |
| 18671 | "/// the lower 16 bits of \\a __b.\n" |
| 18672 | "/// \\param __b\n" |
| 18673 | "/// An integer. The lower 16 bits of this parameter are written to the\n" |
| 18674 | "/// result beginning at an offset specified by \\a __imm.\n" |
| 18675 | "/// \\param __imm\n" |
| 18676 | "/// An immediate value specifying the bit offset in the result at which the\n" |
| 18677 | "/// lower 16 bits of \\a __b are written.\n" |
| 18678 | "/// \\returns A 128-bit integer vector containing the constructed values.\n" |
| 18679 | "#define _mm_insert_epi16(a, b, imm) \\\n" |
| 18680 | " (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \\\n" |
| 18681 | " (int)(imm))\n" |
| 18682 | "\n" |
| 18683 | "/// Copies the values of the most significant bits from each 8-bit\n" |
| 18684 | "/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask\n" |
| 18685 | "/// value, zero-extends the value, and writes it to the destination.\n" |
| 18686 | "///\n" |
| 18687 | "/// \\headerfile <x86intrin.h>\n" |
| 18688 | "///\n" |
| 18689 | "/// This intrinsic corresponds to the <c> VPMOVMSKB / PMOVMSKB </c> instruction.\n" |
| 18690 | "///\n" |
| 18691 | "/// \\param __a\n" |
| 18692 | "/// A 128-bit integer vector containing the values with bits to be extracted.\n" |
| 18693 | "/// \\returns The most significant bits from each 8-bit element in \\a __a,\n" |
| 18694 | "/// written to bits [15:0]. The other bits are assigned zeros.\n" |
| 18695 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 18696 | "_mm_movemask_epi8(__m128i __a)\n" |
| 18697 | "{\n" |
| 18698 | " return __builtin_ia32_pmovmskb128((__v16qi)__a);\n" |
| 18699 | "}\n" |
| 18700 | "\n" |
| 18701 | "/// Constructs a 128-bit integer vector by shuffling four 32-bit\n" |
| 18702 | "/// elements of a 128-bit integer vector parameter, using the immediate-value\n" |
| 18703 | "/// parameter as a specifier.\n" |
| 18704 | "///\n" |
| 18705 | "/// \\headerfile <x86intrin.h>\n" |
| 18706 | "///\n" |
| 18707 | "/// \\code\n" |
| 18708 | "/// __m128i _mm_shuffle_epi32(__m128i a, const int imm);\n" |
| 18709 | "/// \\endcode\n" |
| 18710 | "///\n" |
| 18711 | "/// This intrinsic corresponds to the <c> VPSHUFD / PSHUFD </c> instruction.\n" |
| 18712 | "///\n" |
| 18713 | "/// \\param a\n" |
| 18714 | "/// A 128-bit integer vector containing the values to be copied.\n" |
| 18715 | "/// \\param imm\n" |
| 18716 | "/// An immediate value containing an 8-bit value specifying which elements to\n" |
| 18717 | "/// copy from a. The destinations within the 128-bit destination are assigned\n" |
| 18718 | "/// values as follows: \\n\n" |
| 18719 | "/// Bits [1:0] are used to assign values to bits [31:0] of the result. \\n\n" |
| 18720 | "/// Bits [3:2] are used to assign values to bits [63:32] of the result. \\n\n" |
| 18721 | "/// Bits [5:4] are used to assign values to bits [95:64] of the result. \\n\n" |
| 18722 | "/// Bits [7:6] are used to assign values to bits [127:96] of the result. \\n\n" |
| 18723 | "/// Bit value assignments: \\n\n" |
| 18724 | "/// 00: assign values from bits [31:0] of \\a a. \\n\n" |
| 18725 | "/// 01: assign values from bits [63:32] of \\a a. \\n\n" |
| 18726 | "/// 10: assign values from bits [95:64] of \\a a. \\n\n" |
| 18727 | "/// 11: assign values from bits [127:96] of \\a a.\n" |
| 18728 | "/// \\returns A 128-bit integer vector containing the shuffled values.\n" |
| 18729 | "#define _mm_shuffle_epi32(a, imm) \\\n" |
| 18730 | " (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))\n" |
| 18731 | "\n" |
| 18732 | "/// Constructs a 128-bit integer vector by shuffling four lower 16-bit\n" |
| 18733 | "/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n" |
| 18734 | "/// value parameter as a specifier.\n" |
| 18735 | "///\n" |
| 18736 | "/// \\headerfile <x86intrin.h>\n" |
| 18737 | "///\n" |
| 18738 | "/// \\code\n" |
| 18739 | "/// __m128i _mm_shufflelo_epi16(__m128i a, const int imm);\n" |
| 18740 | "/// \\endcode\n" |
| 18741 | "///\n" |
| 18742 | "/// This intrinsic corresponds to the <c> VPSHUFLW / PSHUFLW </c> instruction.\n" |
| 18743 | "///\n" |
| 18744 | "/// \\param a\n" |
| 18745 | "/// A 128-bit integer vector of [8 x i16]. Bits [127:64] are copied to bits\n" |
| 18746 | "/// [127:64] of the result.\n" |
| 18747 | "/// \\param imm\n" |
| 18748 | "/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n" |
| 18749 | "/// Bits[1:0] are used to assign values to bits [15:0] of the result. \\n\n" |
| 18750 | "/// Bits[3:2] are used to assign values to bits [31:16] of the result. \\n\n" |
| 18751 | "/// Bits[5:4] are used to assign values to bits [47:32] of the result. \\n\n" |
| 18752 | "/// Bits[7:6] are used to assign values to bits [63:48] of the result. \\n\n" |
| 18753 | "/// Bit value assignments: \\n\n" |
| 18754 | "/// 00: assign values from bits [15:0] of \\a a. \\n\n" |
| 18755 | "/// 01: assign values from bits [31:16] of \\a a. \\n\n" |
| 18756 | "/// 10: assign values from bits [47:32] of \\a a. \\n\n" |
| 18757 | "/// 11: assign values from bits [63:48] of \\a a. \\n\n" |
| 18758 | "/// \\returns A 128-bit integer vector containing the shuffled values.\n" |
| 18759 | "#define _mm_shufflelo_epi16(a, imm) \\\n" |
| 18760 | " (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))\n" |
| 18761 | "\n" |
| 18762 | "/// Constructs a 128-bit integer vector by shuffling four upper 16-bit\n" |
| 18763 | "/// elements of a 128-bit integer vector of [8 x i16], using the immediate\n" |
| 18764 | "/// value parameter as a specifier.\n" |
| 18765 | "///\n" |
| 18766 | "/// \\headerfile <x86intrin.h>\n" |
| 18767 | "///\n" |
| 18768 | "/// \\code\n" |
| 18769 | "/// __m128i _mm_shufflehi_epi16(__m128i a, const int imm);\n" |
| 18770 | "/// \\endcode\n" |
| 18771 | "///\n" |
| 18772 | "/// This intrinsic corresponds to the <c> VPSHUFHW / PSHUFHW </c> instruction.\n" |
| 18773 | "///\n" |
| 18774 | "/// \\param a\n" |
| 18775 | "/// A 128-bit integer vector of [8 x i16]. Bits [63:0] are copied to bits\n" |
| 18776 | "/// [63:0] of the result.\n" |
| 18777 | "/// \\param imm\n" |
| 18778 | "/// An 8-bit immediate value specifying which elements to copy from \\a a. \\n\n" |
| 18779 | "/// Bits[1:0] are used to assign values to bits [79:64] of the result. \\n\n" |
| 18780 | "/// Bits[3:2] are used to assign values to bits [95:80] of the result. \\n\n" |
| 18781 | "/// Bits[5:4] are used to assign values to bits [111:96] of the result. \\n\n" |
| 18782 | "/// Bits[7:6] are used to assign values to bits [127:112] of the result. \\n\n" |
| 18783 | "/// Bit value assignments: \\n\n" |
| 18784 | "/// 00: assign values from bits [79:64] of \\a a. \\n\n" |
| 18785 | "/// 01: assign values from bits [95:80] of \\a a. \\n\n" |
| 18786 | "/// 10: assign values from bits [111:96] of \\a a. \\n\n" |
| 18787 | "/// 11: assign values from bits [127:112] of \\a a. \\n\n" |
| 18788 | "/// \\returns A 128-bit integer vector containing the shuffled values.\n" |
| 18789 | "#define _mm_shufflehi_epi16(a, imm) \\\n" |
| 18790 | " (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))\n" |
| 18791 | "\n" |
| 18792 | "/// Unpacks the high-order (index 8-15) values from two 128-bit vectors\n" |
| 18793 | "/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n" |
| 18794 | "///\n" |
| 18795 | "/// \\headerfile <x86intrin.h>\n" |
| 18796 | "///\n" |
| 18797 | "/// This intrinsic corresponds to the <c> VPUNPCKHBW / PUNPCKHBW </c>\n" |
| 18798 | "/// instruction.\n" |
| 18799 | "///\n" |
| 18800 | "/// \\param __a\n" |
| 18801 | "/// A 128-bit vector of [16 x i8].\n" |
| 18802 | "/// Bits [71:64] are written to bits [7:0] of the result. \\n\n" |
| 18803 | "/// Bits [79:72] are written to bits [23:16] of the result. \\n\n" |
| 18804 | "/// Bits [87:80] are written to bits [39:32] of the result. \\n\n" |
| 18805 | "/// Bits [95:88] are written to bits [55:48] of the result. \\n\n" |
| 18806 | "/// Bits [103:96] are written to bits [71:64] of the result. \\n\n" |
| 18807 | "/// Bits [111:104] are written to bits [87:80] of the result. \\n\n" |
| 18808 | "/// Bits [119:112] are written to bits [103:96] of the result. \\n\n" |
| 18809 | "/// Bits [127:120] are written to bits [119:112] of the result.\n" |
| 18810 | "/// \\param __b\n" |
| 18811 | "/// A 128-bit vector of [16 x i8]. \\n\n" |
| 18812 | "/// Bits [71:64] are written to bits [15:8] of the result. \\n\n" |
| 18813 | "/// Bits [79:72] are written to bits [31:24] of the result. \\n\n" |
| 18814 | "/// Bits [87:80] are written to bits [47:40] of the result. \\n\n" |
| 18815 | "/// Bits [95:88] are written to bits [63:56] of the result. \\n\n" |
| 18816 | "/// Bits [103:96] are written to bits [79:72] of the result. \\n\n" |
| 18817 | "/// Bits [111:104] are written to bits [95:88] of the result. \\n\n" |
| 18818 | "/// Bits [119:112] are written to bits [111:104] of the result. \\n\n" |
| 18819 | "/// Bits [127:120] are written to bits [127:120] of the result.\n" |
| 18820 | "/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n" |
| 18821 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18822 | "_mm_unpackhi_epi8(__m128i __a, __m128i __b)\n" |
| 18823 | "{\n" |
| 18824 | " return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);\n" |
| 18825 | "}\n" |
| 18826 | "\n" |
| 18827 | "/// Unpacks the high-order (index 4-7) values from two 128-bit vectors of\n" |
| 18828 | "/// [8 x i16] and interleaves them into a 128-bit vector of [8 x i16].\n" |
| 18829 | "///\n" |
| 18830 | "/// \\headerfile <x86intrin.h>\n" |
| 18831 | "///\n" |
| 18832 | "/// This intrinsic corresponds to the <c> VPUNPCKHWD / PUNPCKHWD </c>\n" |
| 18833 | "/// instruction.\n" |
| 18834 | "///\n" |
| 18835 | "/// \\param __a\n" |
| 18836 | "/// A 128-bit vector of [8 x i16].\n" |
| 18837 | "/// Bits [79:64] are written to bits [15:0] of the result. \\n\n" |
| 18838 | "/// Bits [95:80] are written to bits [47:32] of the result. \\n\n" |
| 18839 | "/// Bits [111:96] are written to bits [79:64] of the result. \\n\n" |
| 18840 | "/// Bits [127:112] are written to bits [111:96] of the result.\n" |
| 18841 | "/// \\param __b\n" |
| 18842 | "/// A 128-bit vector of [8 x i16].\n" |
| 18843 | "/// Bits [79:64] are written to bits [31:16] of the result. \\n\n" |
| 18844 | "/// Bits [95:80] are written to bits [63:48] of the result. \\n\n" |
| 18845 | "/// Bits [111:96] are written to bits [95:80] of the result. \\n\n" |
| 18846 | "/// Bits [127:112] are written to bits [127:112] of the result.\n" |
| 18847 | "/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n" |
| 18848 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18849 | "_mm_unpackhi_epi16(__m128i __a, __m128i __b)\n" |
| 18850 | "{\n" |
| 18851 | " return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);\n" |
| 18852 | "}\n" |
| 18853 | "\n" |
| 18854 | "/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n" |
| 18855 | "/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n" |
| 18856 | "///\n" |
| 18857 | "/// \\headerfile <x86intrin.h>\n" |
| 18858 | "///\n" |
| 18859 | "/// This intrinsic corresponds to the <c> VPUNPCKHDQ / PUNPCKHDQ </c>\n" |
| 18860 | "/// instruction.\n" |
| 18861 | "///\n" |
| 18862 | "/// \\param __a\n" |
| 18863 | "/// A 128-bit vector of [4 x i32]. \\n\n" |
| 18864 | "/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n" |
| 18865 | "/// Bits [127:96] are written to bits [95:64] of the destination.\n" |
| 18866 | "/// \\param __b\n" |
| 18867 | "/// A 128-bit vector of [4 x i32]. \\n\n" |
| 18868 | "/// Bits [95:64] are written to bits [64:32] of the destination. \\n\n" |
| 18869 | "/// Bits [127:96] are written to bits [127:96] of the destination.\n" |
| 18870 | "/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n" |
| 18871 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18872 | "_mm_unpackhi_epi32(__m128i __a, __m128i __b)\n" |
| 18873 | "{\n" |
| 18874 | " return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);\n" |
| 18875 | "}\n" |
| 18876 | "\n" |
| 18877 | "/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n" |
| 18878 | "/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n" |
| 18879 | "///\n" |
| 18880 | "/// \\headerfile <x86intrin.h>\n" |
| 18881 | "///\n" |
| 18882 | "/// This intrinsic corresponds to the <c> VPUNPCKHQDQ / PUNPCKHQDQ </c>\n" |
| 18883 | "/// instruction.\n" |
| 18884 | "///\n" |
| 18885 | "/// \\param __a\n" |
| 18886 | "/// A 128-bit vector of [2 x i64]. \\n\n" |
| 18887 | "/// Bits [127:64] are written to bits [63:0] of the destination.\n" |
| 18888 | "/// \\param __b\n" |
| 18889 | "/// A 128-bit vector of [2 x i64]. \\n\n" |
| 18890 | "/// Bits [127:64] are written to bits [127:64] of the destination.\n" |
| 18891 | "/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n" |
| 18892 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18893 | "_mm_unpackhi_epi64(__m128i __a, __m128i __b)\n" |
| 18894 | "{\n" |
| 18895 | " return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);\n" |
| 18896 | "}\n" |
| 18897 | "\n" |
| 18898 | "/// Unpacks the low-order (index 0-7) values from two 128-bit vectors of\n" |
| 18899 | "/// [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].\n" |
| 18900 | "///\n" |
| 18901 | "/// \\headerfile <x86intrin.h>\n" |
| 18902 | "///\n" |
| 18903 | "/// This intrinsic corresponds to the <c> VPUNPCKLBW / PUNPCKLBW </c>\n" |
| 18904 | "/// instruction.\n" |
| 18905 | "///\n" |
| 18906 | "/// \\param __a\n" |
| 18907 | "/// A 128-bit vector of [16 x i8]. \\n\n" |
| 18908 | "/// Bits [7:0] are written to bits [7:0] of the result. \\n\n" |
| 18909 | "/// Bits [15:8] are written to bits [23:16] of the result. \\n\n" |
| 18910 | "/// Bits [23:16] are written to bits [39:32] of the result. \\n\n" |
| 18911 | "/// Bits [31:24] are written to bits [55:48] of the result. \\n\n" |
| 18912 | "/// Bits [39:32] are written to bits [71:64] of the result. \\n\n" |
| 18913 | "/// Bits [47:40] are written to bits [87:80] of the result. \\n\n" |
| 18914 | "/// Bits [55:48] are written to bits [103:96] of the result. \\n\n" |
| 18915 | "/// Bits [63:56] are written to bits [119:112] of the result.\n" |
| 18916 | "/// \\param __b\n" |
| 18917 | "/// A 128-bit vector of [16 x i8].\n" |
| 18918 | "/// Bits [7:0] are written to bits [15:8] of the result. \\n\n" |
| 18919 | "/// Bits [15:8] are written to bits [31:24] of the result. \\n\n" |
| 18920 | "/// Bits [23:16] are written to bits [47:40] of the result. \\n\n" |
| 18921 | "/// Bits [31:24] are written to bits [63:56] of the result. \\n\n" |
| 18922 | "/// Bits [39:32] are written to bits [79:72] of the result. \\n\n" |
| 18923 | "/// Bits [47:40] are written to bits [95:88] of the result. \\n\n" |
| 18924 | "/// Bits [55:48] are written to bits [111:104] of the result. \\n\n" |
| 18925 | "/// Bits [63:56] are written to bits [127:120] of the result.\n" |
| 18926 | "/// \\returns A 128-bit vector of [16 x i8] containing the interleaved values.\n" |
| 18927 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18928 | "_mm_unpacklo_epi8(__m128i __a, __m128i __b)\n" |
| 18929 | "{\n" |
| 18930 | " return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);\n" |
| 18931 | "}\n" |
| 18932 | "\n" |
| 18933 | "/// Unpacks the low-order (index 0-3) values from each of the two 128-bit\n" |
| 18934 | "/// vectors of [8 x i16] and interleaves them into a 128-bit vector of\n" |
| 18935 | "/// [8 x i16].\n" |
| 18936 | "///\n" |
| 18937 | "/// \\headerfile <x86intrin.h>\n" |
| 18938 | "///\n" |
| 18939 | "/// This intrinsic corresponds to the <c> VPUNPCKLWD / PUNPCKLWD </c>\n" |
| 18940 | "/// instruction.\n" |
| 18941 | "///\n" |
| 18942 | "/// \\param __a\n" |
| 18943 | "/// A 128-bit vector of [8 x i16].\n" |
| 18944 | "/// Bits [15:0] are written to bits [15:0] of the result. \\n\n" |
| 18945 | "/// Bits [31:16] are written to bits [47:32] of the result. \\n\n" |
| 18946 | "/// Bits [47:32] are written to bits [79:64] of the result. \\n\n" |
| 18947 | "/// Bits [63:48] are written to bits [111:96] of the result.\n" |
| 18948 | "/// \\param __b\n" |
| 18949 | "/// A 128-bit vector of [8 x i16].\n" |
| 18950 | "/// Bits [15:0] are written to bits [31:16] of the result. \\n\n" |
| 18951 | "/// Bits [31:16] are written to bits [63:48] of the result. \\n\n" |
| 18952 | "/// Bits [47:32] are written to bits [95:80] of the result. \\n\n" |
| 18953 | "/// Bits [63:48] are written to bits [127:112] of the result.\n" |
| 18954 | "/// \\returns A 128-bit vector of [8 x i16] containing the interleaved values.\n" |
| 18955 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18956 | "_mm_unpacklo_epi16(__m128i __a, __m128i __b)\n" |
| 18957 | "{\n" |
| 18958 | " return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);\n" |
| 18959 | "}\n" |
| 18960 | "\n" |
| 18961 | "/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n" |
| 18962 | "/// [4 x i32] and interleaves them into a 128-bit vector of [4 x i32].\n" |
| 18963 | "///\n" |
| 18964 | "/// \\headerfile <x86intrin.h>\n" |
| 18965 | "///\n" |
| 18966 | "/// This intrinsic corresponds to the <c> VPUNPCKLDQ / PUNPCKLDQ </c>\n" |
| 18967 | "/// instruction.\n" |
| 18968 | "///\n" |
| 18969 | "/// \\param __a\n" |
| 18970 | "/// A 128-bit vector of [4 x i32]. \\n\n" |
| 18971 | "/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n" |
| 18972 | "/// Bits [63:32] are written to bits [95:64] of the destination.\n" |
| 18973 | "/// \\param __b\n" |
| 18974 | "/// A 128-bit vector of [4 x i32]. \\n\n" |
| 18975 | "/// Bits [31:0] are written to bits [64:32] of the destination. \\n\n" |
| 18976 | "/// Bits [63:32] are written to bits [127:96] of the destination.\n" |
| 18977 | "/// \\returns A 128-bit vector of [4 x i32] containing the interleaved values.\n" |
| 18978 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 18979 | "_mm_unpacklo_epi32(__m128i __a, __m128i __b)\n" |
| 18980 | "{\n" |
| 18981 | " return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);\n" |
| 18982 | "}\n" |
| 18983 | "\n" |
| 18984 | "/// Unpacks the low-order 64-bit elements from two 128-bit vectors of\n" |
| 18985 | "/// [2 x i64] and interleaves them into a 128-bit vector of [2 x i64].\n" |
| 18986 | "///\n" |
| 18987 | "/// \\headerfile <x86intrin.h>\n" |
| 18988 | "///\n" |
| 18989 | "/// This intrinsic corresponds to the <c> VPUNPCKLQDQ / PUNPCKLQDQ </c>\n" |
| 18990 | "/// instruction.\n" |
| 18991 | "///\n" |
| 18992 | "/// \\param __a\n" |
| 18993 | "/// A 128-bit vector of [2 x i64]. \\n\n" |
| 18994 | "/// Bits [63:0] are written to bits [63:0] of the destination. \\n\n" |
| 18995 | "/// \\param __b\n" |
| 18996 | "/// A 128-bit vector of [2 x i64]. \\n\n" |
| 18997 | "/// Bits [63:0] are written to bits [127:64] of the destination. \\n\n" |
| 18998 | "/// \\returns A 128-bit vector of [2 x i64] containing the interleaved values.\n" |
| 18999 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 19000 | "_mm_unpacklo_epi64(__m128i __a, __m128i __b)\n" |
| 19001 | "{\n" |
| 19002 | " return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);\n" |
| 19003 | "}\n" |
| 19004 | "\n" |
| 19005 | "/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit\n" |
| 19006 | "/// integer.\n" |
| 19007 | "///\n" |
| 19008 | "/// \\headerfile <x86intrin.h>\n" |
| 19009 | "///\n" |
| 19010 | "/// This intrinsic corresponds to the <c> MOVDQ2Q </c> instruction.\n" |
| 19011 | "///\n" |
| 19012 | "/// \\param __a\n" |
| 19013 | "/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n" |
| 19014 | "/// destination.\n" |
| 19015 | "/// \\returns A 64-bit integer containing the lower 64 bits of the parameter.\n" |
| 19016 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 19017 | "_mm_movepi64_pi64(__m128i __a)\n" |
| 19018 | "{\n" |
| 19019 | " return (__m64)__a[0];\n" |
| 19020 | "}\n" |
| 19021 | "\n" |
| 19022 | "/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the\n" |
| 19023 | "/// upper bits.\n" |
| 19024 | "///\n" |
| 19025 | "/// \\headerfile <x86intrin.h>\n" |
| 19026 | "///\n" |
| 19027 | "/// This intrinsic corresponds to the <c> MOVD+VMOVQ </c> instruction.\n" |
| 19028 | "///\n" |
| 19029 | "/// \\param __a\n" |
| 19030 | "/// A 64-bit value.\n" |
| 19031 | "/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n" |
| 19032 | "/// the operand. The upper 64 bits are assigned zeros.\n" |
| 19033 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 19034 | "_mm_movpi64_epi64(__m64 __a)\n" |
| 19035 | "{\n" |
| 19036 | " return __extension__ (__m128i)(__v2di){ (long long)__a, 0 };\n" |
| 19037 | "}\n" |
| 19038 | "\n" |
| 19039 | "/// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit\n" |
| 19040 | "/// integer vector, zeroing the upper bits.\n" |
| 19041 | "///\n" |
| 19042 | "/// \\headerfile <x86intrin.h>\n" |
| 19043 | "///\n" |
| 19044 | "/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.\n" |
| 19045 | "///\n" |
| 19046 | "/// \\param __a\n" |
| 19047 | "/// A 128-bit integer vector operand. The lower 64 bits are moved to the\n" |
| 19048 | "/// destination.\n" |
| 19049 | "/// \\returns A 128-bit integer vector. The lower 64 bits contain the value from\n" |
| 19050 | "/// the operand. The upper 64 bits are assigned zeros.\n" |
| 19051 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 19052 | "_mm_move_epi64(__m128i __a)\n" |
| 19053 | "{\n" |
| 19054 | " return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);\n" |
| 19055 | "}\n" |
| 19056 | "\n" |
| 19057 | "/// Unpacks the high-order 64-bit elements from two 128-bit vectors of\n" |
| 19058 | "/// [2 x double] and interleaves them into a 128-bit vector of [2 x\n" |
| 19059 | "/// double].\n" |
| 19060 | "///\n" |
| 19061 | "/// \\headerfile <x86intrin.h>\n" |
| 19062 | "///\n" |
| 19063 | "/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n" |
| 19064 | "///\n" |
| 19065 | "/// \\param __a\n" |
| 19066 | "/// A 128-bit vector of [2 x double]. \\n\n" |
| 19067 | "/// Bits [127:64] are written to bits [63:0] of the destination.\n" |
| 19068 | "/// \\param __b\n" |
| 19069 | "/// A 128-bit vector of [2 x double]. \\n\n" |
| 19070 | "/// Bits [127:64] are written to bits [127:64] of the destination.\n" |
| 19071 | "/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n" |
| 19072 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 19073 | "_mm_unpackhi_pd(__m128d __a, __m128d __b)\n" |
| 19074 | "{\n" |
| 19075 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);\n" |
| 19076 | "}\n" |
| 19077 | "\n" |
| 19078 | "/// Unpacks the low-order 64-bit elements from two 128-bit vectors\n" |
| 19079 | "/// of [2 x double] and interleaves them into a 128-bit vector of [2 x\n" |
| 19080 | "/// double].\n" |
| 19081 | "///\n" |
| 19082 | "/// \\headerfile <x86intrin.h>\n" |
| 19083 | "///\n" |
| 19084 | "/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n" |
| 19085 | "///\n" |
| 19086 | "/// \\param __a\n" |
| 19087 | "/// A 128-bit vector of [2 x double]. \\n\n" |
| 19088 | "/// Bits [63:0] are written to bits [63:0] of the destination.\n" |
| 19089 | "/// \\param __b\n" |
| 19090 | "/// A 128-bit vector of [2 x double]. \\n\n" |
| 19091 | "/// Bits [63:0] are written to bits [127:64] of the destination.\n" |
| 19092 | "/// \\returns A 128-bit vector of [2 x double] containing the interleaved values.\n" |
| 19093 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 19094 | "_mm_unpacklo_pd(__m128d __a, __m128d __b)\n" |
| 19095 | "{\n" |
| 19096 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);\n" |
| 19097 | "}\n" |
| 19098 | "\n" |
| 19099 | "/// Extracts the sign bits of the double-precision values in the 128-bit\n" |
| 19100 | "/// vector of [2 x double], zero-extends the value, and writes it to the\n" |
| 19101 | "/// low-order bits of the destination.\n" |
| 19102 | "///\n" |
| 19103 | "/// \\headerfile <x86intrin.h>\n" |
| 19104 | "///\n" |
| 19105 | "/// This intrinsic corresponds to the <c> VMOVMSKPD / MOVMSKPD </c> instruction.\n" |
| 19106 | "///\n" |
| 19107 | "/// \\param __a\n" |
| 19108 | "/// A 128-bit vector of [2 x double] containing the values with sign bits to\n" |
| 19109 | "/// be extracted.\n" |
| 19110 | "/// \\returns The sign bits from each of the double-precision elements in \\a __a,\n" |
| 19111 | "/// written to bits [1:0]. The remaining bits are assigned values of zero.\n" |
| 19112 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 19113 | "_mm_movemask_pd(__m128d __a)\n" |
| 19114 | "{\n" |
| 19115 | " return __builtin_ia32_movmskpd((__v2df)__a);\n" |
| 19116 | "}\n" |
| 19117 | "\n" |
| 19118 | "\n" |
| 19119 | "/// Constructs a 128-bit floating-point vector of [2 x double] from two\n" |
| 19120 | "/// 128-bit vector parameters of [2 x double], using the immediate-value\n" |
| 19121 | "/// parameter as a specifier.\n" |
| 19122 | "///\n" |
| 19123 | "/// \\headerfile <x86intrin.h>\n" |
| 19124 | "///\n" |
| 19125 | "/// \\code\n" |
| 19126 | "/// __m128d _mm_shuffle_pd(__m128d a, __m128d b, const int i);\n" |
| 19127 | "/// \\endcode\n" |
| 19128 | "///\n" |
| 19129 | "/// This intrinsic corresponds to the <c> VSHUFPD / SHUFPD </c> instruction.\n" |
| 19130 | "///\n" |
| 19131 | "/// \\param a\n" |
| 19132 | "/// A 128-bit vector of [2 x double].\n" |
| 19133 | "/// \\param b\n" |
| 19134 | "/// A 128-bit vector of [2 x double].\n" |
| 19135 | "/// \\param i\n" |
| 19136 | "/// An 8-bit immediate value. The least significant two bits specify which\n" |
| 19137 | "/// elements to copy from \\a a and \\a b: \\n\n" |
| 19138 | "/// Bit[0] = 0: lower element of \\a a copied to lower element of result. \\n\n" |
| 19139 | "/// Bit[0] = 1: upper element of \\a a copied to lower element of result. \\n\n" |
| 19140 | "/// Bit[1] = 0: lower element of \\a b copied to upper element of result. \\n\n" |
| 19141 | "/// Bit[1] = 1: upper element of \\a b copied to upper element of result. \\n\n" |
| 19142 | "/// \\returns A 128-bit vector of [2 x double] containing the shuffled values.\n" |
| 19143 | "#define _mm_shuffle_pd(a, b, i) \\\n" |
| 19144 | " (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \\\n" |
| 19145 | " (int)(i))\n" |
| 19146 | "\n" |
| 19147 | "/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n" |
| 19148 | "/// floating-point vector of [4 x float].\n" |
| 19149 | "///\n" |
| 19150 | "/// \\headerfile <x86intrin.h>\n" |
| 19151 | "///\n" |
| 19152 | "/// This intrinsic has no corresponding instruction.\n" |
| 19153 | "///\n" |
| 19154 | "/// \\param __a\n" |
| 19155 | "/// A 128-bit floating-point vector of [2 x double].\n" |
| 19156 | "/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n" |
| 19157 | "/// bitwise pattern as the parameter.\n" |
| 19158 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 19159 | "_mm_castpd_ps(__m128d __a)\n" |
| 19160 | "{\n" |
| 19161 | " return (__m128)__a;\n" |
| 19162 | "}\n" |
| 19163 | "\n" |
| 19164 | "/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit\n" |
| 19165 | "/// integer vector.\n" |
| 19166 | "///\n" |
| 19167 | "/// \\headerfile <x86intrin.h>\n" |
| 19168 | "///\n" |
| 19169 | "/// This intrinsic has no corresponding instruction.\n" |
| 19170 | "///\n" |
| 19171 | "/// \\param __a\n" |
| 19172 | "/// A 128-bit floating-point vector of [2 x double].\n" |
| 19173 | "/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n" |
| 19174 | "/// parameter.\n" |
| 19175 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 19176 | "_mm_castpd_si128(__m128d __a)\n" |
| 19177 | "{\n" |
| 19178 | " return (__m128i)__a;\n" |
| 19179 | "}\n" |
| 19180 | "\n" |
| 19181 | "/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n" |
| 19182 | "/// floating-point vector of [2 x double].\n" |
| 19183 | "///\n" |
| 19184 | "/// \\headerfile <x86intrin.h>\n" |
| 19185 | "///\n" |
| 19186 | "/// This intrinsic has no corresponding instruction.\n" |
| 19187 | "///\n" |
| 19188 | "/// \\param __a\n" |
| 19189 | "/// A 128-bit floating-point vector of [4 x float].\n" |
| 19190 | "/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n" |
| 19191 | "/// bitwise pattern as the parameter.\n" |
| 19192 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 19193 | "_mm_castps_pd(__m128 __a)\n" |
| 19194 | "{\n" |
| 19195 | " return (__m128d)__a;\n" |
| 19196 | "}\n" |
| 19197 | "\n" |
| 19198 | "/// Casts a 128-bit floating-point vector of [4 x float] into a 128-bit\n" |
| 19199 | "/// integer vector.\n" |
| 19200 | "///\n" |
| 19201 | "/// \\headerfile <x86intrin.h>\n" |
| 19202 | "///\n" |
| 19203 | "/// This intrinsic has no corresponding instruction.\n" |
| 19204 | "///\n" |
| 19205 | "/// \\param __a\n" |
| 19206 | "/// A 128-bit floating-point vector of [4 x float].\n" |
| 19207 | "/// \\returns A 128-bit integer vector containing the same bitwise pattern as the\n" |
| 19208 | "/// parameter.\n" |
| 19209 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 19210 | "_mm_castps_si128(__m128 __a)\n" |
| 19211 | "{\n" |
| 19212 | " return (__m128i)__a;\n" |
| 19213 | "}\n" |
| 19214 | "\n" |
| 19215 | "/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n" |
| 19216 | "/// of [4 x float].\n" |
| 19217 | "///\n" |
| 19218 | "/// \\headerfile <x86intrin.h>\n" |
| 19219 | "///\n" |
| 19220 | "/// This intrinsic has no corresponding instruction.\n" |
| 19221 | "///\n" |
| 19222 | "/// \\param __a\n" |
| 19223 | "/// A 128-bit integer vector.\n" |
| 19224 | "/// \\returns A 128-bit floating-point vector of [4 x float] containing the same\n" |
| 19225 | "/// bitwise pattern as the parameter.\n" |
| 19226 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 19227 | "_mm_castsi128_ps(__m128i __a)\n" |
| 19228 | "{\n" |
| 19229 | " return (__m128)__a;\n" |
| 19230 | "}\n" |
| 19231 | "\n" |
| 19232 | "/// Casts a 128-bit integer vector into a 128-bit floating-point vector\n" |
| 19233 | "/// of [2 x double].\n" |
| 19234 | "///\n" |
| 19235 | "/// \\headerfile <x86intrin.h>\n" |
| 19236 | "///\n" |
| 19237 | "/// This intrinsic has no corresponding instruction.\n" |
| 19238 | "///\n" |
| 19239 | "/// \\param __a\n" |
| 19240 | "/// A 128-bit integer vector.\n" |
| 19241 | "/// \\returns A 128-bit floating-point vector of [2 x double] containing the same\n" |
| 19242 | "/// bitwise pattern as the parameter.\n" |
| 19243 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 19244 | "_mm_castsi128_pd(__m128i __a)\n" |
| 19245 | "{\n" |
| 19246 | " return (__m128d)__a;\n" |
| 19247 | "}\n" |
| 19248 | "\n" |
| 19249 | "#if defined(__cplusplus)\n" |
| 19250 | "extern \"C\" {\n" |
| 19251 | "#endif\n" |
| 19252 | "\n" |
| 19253 | "/// Indicates that a spin loop is being executed for the purposes of\n" |
| 19254 | "/// optimizing power consumption during the loop.\n" |
| 19255 | "///\n" |
| 19256 | "/// \\headerfile <x86intrin.h>\n" |
| 19257 | "///\n" |
| 19258 | "/// This intrinsic corresponds to the <c> PAUSE </c> instruction.\n" |
| 19259 | "///\n" |
| 19260 | "void _mm_pause(void);\n" |
| 19261 | "\n" |
| 19262 | "#if defined(__cplusplus)\n" |
| 19263 | "} // extern \"C\"\n" |
| 19264 | "#endif\n" |
| 19265 | "#undef __DEFAULT_FN_ATTRS\n" |
| 19266 | "#undef __DEFAULT_FN_ATTRS_MMX\n" |
| 19267 | "\n" |
| 19268 | "#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))\n" |
| 19269 | "\n" |
| 19270 | "#define _MM_DENORMALS_ZERO_ON (0x0040)\n" |
| 19271 | "#define _MM_DENORMALS_ZERO_OFF (0x0000)\n" |
| 19272 | "\n" |
| 19273 | "#define _MM_DENORMALS_ZERO_MASK (0x0040)\n" |
| 19274 | "\n" |
| 19275 | "#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)\n" |
| 19276 | "#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))\n" |
| 19277 | "\n" |
| 19278 | "#endif /* __EMMINTRIN_H */\n" |
| 19279 | "" } , |
| 19280 | { "/builtins/f16cintrin.h" , "/*===---- f16cintrin.h - F16C intrinsics -----------------------------------===\n" |
| 19281 | " *\n" |
| 19282 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 19283 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 19284 | " * in the Software without restriction, including without limitation the rights\n" |
| 19285 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 19286 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 19287 | " * furnished to do so, subject to the following conditions:\n" |
| 19288 | " *\n" |
| 19289 | " * The above copyright notice and this permission notice shall be included in\n" |
| 19290 | " * all copies or substantial portions of the Software.\n" |
| 19291 | " *\n" |
| 19292 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 19293 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 19294 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 19295 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 19296 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 19297 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 19298 | " * THE SOFTWARE.\n" |
| 19299 | " *\n" |
| 19300 | " *===-----------------------------------------------------------------------===\n" |
| 19301 | " */\n" |
| 19302 | "\n" |
| 19303 | "#if !defined __IMMINTRIN_H\n" |
| 19304 | "#error \"Never use <f16cintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 19305 | "#endif\n" |
| 19306 | "\n" |
| 19307 | "#ifndef __F16CINTRIN_H\n" |
| 19308 | "#define __F16CINTRIN_H\n" |
| 19309 | "\n" |
| 19310 | "/* Define the default attributes for the functions in this file. */\n" |
| 19311 | "#define __DEFAULT_FN_ATTRS128 \\\n" |
| 19312 | " __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(128)))\n" |
| 19313 | "#define __DEFAULT_FN_ATTRS256 \\\n" |
| 19314 | " __attribute__((__always_inline__, __nodebug__, __target__(\"f16c\"), __min_vector_width__(256)))\n" |
| 19315 | "\n" |
| 19316 | "/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,\n" |
| 19317 | " * but that's because icc can emulate these without f16c using a library call.\n" |
| 19318 | " * Since we don't do that let's leave these in f16cintrin.h.\n" |
| 19319 | " */\n" |
| 19320 | "\n" |
| 19321 | "/// Converts a 16-bit half-precision float value into a 32-bit float\n" |
| 19322 | "/// value.\n" |
| 19323 | "///\n" |
| 19324 | "/// \\headerfile <x86intrin.h>\n" |
| 19325 | "///\n" |
| 19326 | "/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n" |
| 19327 | "///\n" |
| 19328 | "/// \\param __a\n" |
| 19329 | "/// A 16-bit half-precision float value.\n" |
| 19330 | "/// \\returns The converted 32-bit float value.\n" |
| 19331 | "static __inline float __DEFAULT_FN_ATTRS128\n" |
| 19332 | "_cvtsh_ss(unsigned short __a)\n" |
| 19333 | "{\n" |
| 19334 | " __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};\n" |
| 19335 | " __v4sf r = __builtin_ia32_vcvtph2ps(v);\n" |
| 19336 | " return r[0];\n" |
| 19337 | "}\n" |
| 19338 | "\n" |
| 19339 | "/// Converts a 32-bit single-precision float value to a 16-bit\n" |
| 19340 | "/// half-precision float value.\n" |
| 19341 | "///\n" |
| 19342 | "/// \\headerfile <x86intrin.h>\n" |
| 19343 | "///\n" |
| 19344 | "/// \\code\n" |
| 19345 | "/// unsigned short _cvtss_sh(float a, const int imm);\n" |
| 19346 | "/// \\endcode\n" |
| 19347 | "///\n" |
| 19348 | "/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n" |
| 19349 | "///\n" |
| 19350 | "/// \\param a\n" |
| 19351 | "/// A 32-bit single-precision float value to be converted to a 16-bit\n" |
| 19352 | "/// half-precision float value.\n" |
| 19353 | "/// \\param imm\n" |
| 19354 | "/// An immediate value controlling rounding using bits [2:0]: \\n\n" |
| 19355 | "/// 000: Nearest \\n\n" |
| 19356 | "/// 001: Down \\n\n" |
| 19357 | "/// 010: Up \\n\n" |
| 19358 | "/// 011: Truncate \\n\n" |
| 19359 | "/// 1XX: Use MXCSR.RC for rounding\n" |
| 19360 | "/// \\returns The converted 16-bit half-precision float value.\n" |
| 19361 | "#define _cvtss_sh(a, imm) \\\n" |
| 19362 | " (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \\\n" |
| 19363 | " (imm)))[0])\n" |
| 19364 | "\n" |
| 19365 | "/// Converts a 128-bit vector containing 32-bit float values into a\n" |
| 19366 | "/// 128-bit vector containing 16-bit half-precision float values.\n" |
| 19367 | "///\n" |
| 19368 | "/// \\headerfile <x86intrin.h>\n" |
| 19369 | "///\n" |
| 19370 | "/// \\code\n" |
| 19371 | "/// __m128i _mm_cvtps_ph(__m128 a, const int imm);\n" |
| 19372 | "/// \\endcode\n" |
| 19373 | "///\n" |
| 19374 | "/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n" |
| 19375 | "///\n" |
| 19376 | "/// \\param a\n" |
| 19377 | "/// A 128-bit vector containing 32-bit float values.\n" |
| 19378 | "/// \\param imm\n" |
| 19379 | "/// An immediate value controlling rounding using bits [2:0]: \\n\n" |
| 19380 | "/// 000: Nearest \\n\n" |
| 19381 | "/// 001: Down \\n\n" |
| 19382 | "/// 010: Up \\n\n" |
| 19383 | "/// 011: Truncate \\n\n" |
| 19384 | "/// 1XX: Use MXCSR.RC for rounding\n" |
| 19385 | "/// \\returns A 128-bit vector containing converted 16-bit half-precision float\n" |
| 19386 | "/// values. The lower 64 bits are used to store the converted 16-bit\n" |
| 19387 | "/// half-precision floating-point values.\n" |
| 19388 | "#define _mm_cvtps_ph(a, imm) \\\n" |
| 19389 | " (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))\n" |
| 19390 | "\n" |
| 19391 | "/// Converts a 128-bit vector containing 16-bit half-precision float\n" |
| 19392 | "/// values into a 128-bit vector containing 32-bit float values.\n" |
| 19393 | "///\n" |
| 19394 | "/// \\headerfile <x86intrin.h>\n" |
| 19395 | "///\n" |
| 19396 | "/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n" |
| 19397 | "///\n" |
| 19398 | "/// \\param __a\n" |
| 19399 | "/// A 128-bit vector containing 16-bit half-precision float values. The lower\n" |
| 19400 | "/// 64 bits are used in the conversion.\n" |
| 19401 | "/// \\returns A 128-bit vector of [4 x float] containing converted float values.\n" |
| 19402 | "static __inline __m128 __DEFAULT_FN_ATTRS128\n" |
| 19403 | "_mm_cvtph_ps(__m128i __a)\n" |
| 19404 | "{\n" |
| 19405 | " return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);\n" |
| 19406 | "}\n" |
| 19407 | "\n" |
| 19408 | "/// Converts a 256-bit vector of [8 x float] into a 128-bit vector\n" |
| 19409 | "/// containing 16-bit half-precision float values.\n" |
| 19410 | "///\n" |
| 19411 | "/// \\headerfile <x86intrin.h>\n" |
| 19412 | "///\n" |
| 19413 | "/// \\code\n" |
| 19414 | "/// __m128i _mm256_cvtps_ph(__m256 a, const int imm);\n" |
| 19415 | "/// \\endcode\n" |
| 19416 | "///\n" |
| 19417 | "/// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.\n" |
| 19418 | "///\n" |
| 19419 | "/// \\param a\n" |
| 19420 | "/// A 256-bit vector containing 32-bit single-precision float values to be\n" |
| 19421 | "/// converted to 16-bit half-precision float values.\n" |
| 19422 | "/// \\param imm\n" |
| 19423 | "/// An immediate value controlling rounding using bits [2:0]: \\n\n" |
| 19424 | "/// 000: Nearest \\n\n" |
| 19425 | "/// 001: Down \\n\n" |
| 19426 | "/// 010: Up \\n\n" |
| 19427 | "/// 011: Truncate \\n\n" |
| 19428 | "/// 1XX: Use MXCSR.RC for rounding\n" |
| 19429 | "/// \\returns A 128-bit vector containing the converted 16-bit half-precision\n" |
| 19430 | "/// float values.\n" |
| 19431 | "#define _mm256_cvtps_ph(a, imm) \\\n" |
| 19432 | " (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))\n" |
| 19433 | "\n" |
| 19434 | "/// Converts a 128-bit vector containing 16-bit half-precision float\n" |
| 19435 | "/// values into a 256-bit vector of [8 x float].\n" |
| 19436 | "///\n" |
| 19437 | "/// \\headerfile <x86intrin.h>\n" |
| 19438 | "///\n" |
| 19439 | "/// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.\n" |
| 19440 | "///\n" |
| 19441 | "/// \\param __a\n" |
| 19442 | "/// A 128-bit vector containing 16-bit half-precision float values to be\n" |
| 19443 | "/// converted to 32-bit single-precision float values.\n" |
| 19444 | "/// \\returns A vector of [8 x float] containing the converted 32-bit\n" |
| 19445 | "/// single-precision float values.\n" |
| 19446 | "static __inline __m256 __DEFAULT_FN_ATTRS256\n" |
| 19447 | "_mm256_cvtph_ps(__m128i __a)\n" |
| 19448 | "{\n" |
| 19449 | " return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);\n" |
| 19450 | "}\n" |
| 19451 | "\n" |
| 19452 | "#undef __DEFAULT_FN_ATTRS128\n" |
| 19453 | "#undef __DEFAULT_FN_ATTRS256\n" |
| 19454 | "\n" |
| 19455 | "#endif /* __F16CINTRIN_H */\n" |
| 19456 | "" } , |
| 19457 | { "/builtins/float.h" , "/*===---- float.h - Characteristics of floating point types ----------------===\n" |
| 19458 | " *\n" |
| 19459 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 19460 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 19461 | " * in the Software without restriction, including without limitation the rights\n" |
| 19462 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 19463 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 19464 | " * furnished to do so, subject to the following conditions:\n" |
| 19465 | " *\n" |
| 19466 | " * The above copyright notice and this permission notice shall be included in\n" |
| 19467 | " * all copies or substantial portions of the Software.\n" |
| 19468 | " *\n" |
| 19469 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 19470 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 19471 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 19472 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 19473 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 19474 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 19475 | " * THE SOFTWARE.\n" |
| 19476 | " *\n" |
| 19477 | " *===-----------------------------------------------------------------------===\n" |
| 19478 | " */\n" |
| 19479 | "\n" |
| 19480 | "#ifndef __FLOAT_H\n" |
| 19481 | "#define __FLOAT_H\n" |
| 19482 | "\n" |
| 19483 | "/* If we're on MinGW, fall back to the system's float.h, which might have\n" |
| 19484 | " * additional definitions provided for Windows.\n" |
| 19485 | " * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx\n" |
| 19486 | " *\n" |
| 19487 | " * Also fall back on Darwin to allow additional definitions and\n" |
| 19488 | " * implementation-defined values.\n" |
| 19489 | " */\n" |
| 19490 | "#if (defined(__APPLE__) || (defined(__MINGW32__) || defined(_MSC_VER))) && \\\n" |
| 19491 | " __STDC_HOSTED__ && __has_include_next(<float.h>)\n" |
| 19492 | "\n" |
| 19493 | "/* Prior to Apple's 10.7 SDK, float.h SDK header used to apply an extra level\n" |
| 19494 | " * of #include_next<float.h> to keep Metrowerks compilers happy. Avoid this\n" |
| 19495 | " * extra indirection.\n" |
| 19496 | " */\n" |
| 19497 | "#ifdef __APPLE__\n" |
| 19498 | "#define _FLOAT_H_\n" |
| 19499 | "#endif\n" |
| 19500 | "\n" |
| 19501 | "# include_next <float.h>\n" |
| 19502 | "\n" |
| 19503 | "/* Undefine anything that we'll be redefining below. */\n" |
| 19504 | "# undef FLT_EVAL_METHOD\n" |
| 19505 | "# undef FLT_ROUNDS\n" |
| 19506 | "# undef FLT_RADIX\n" |
| 19507 | "# undef FLT_MANT_DIG\n" |
| 19508 | "# undef DBL_MANT_DIG\n" |
| 19509 | "# undef LDBL_MANT_DIG\n" |
| 19510 | "# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n" |
| 19511 | "# undef DECIMAL_DIG\n" |
| 19512 | "# endif\n" |
| 19513 | "# undef FLT_DIG\n" |
| 19514 | "# undef DBL_DIG\n" |
| 19515 | "# undef LDBL_DIG\n" |
| 19516 | "# undef FLT_MIN_EXP\n" |
| 19517 | "# undef DBL_MIN_EXP\n" |
| 19518 | "# undef LDBL_MIN_EXP\n" |
| 19519 | "# undef FLT_MIN_10_EXP\n" |
| 19520 | "# undef DBL_MIN_10_EXP\n" |
| 19521 | "# undef LDBL_MIN_10_EXP\n" |
| 19522 | "# undef FLT_MAX_EXP\n" |
| 19523 | "# undef DBL_MAX_EXP\n" |
| 19524 | "# undef LDBL_MAX_EXP\n" |
| 19525 | "# undef FLT_MAX_10_EXP\n" |
| 19526 | "# undef DBL_MAX_10_EXP\n" |
| 19527 | "# undef LDBL_MAX_10_EXP\n" |
| 19528 | "# undef FLT_MAX\n" |
| 19529 | "# undef DBL_MAX\n" |
| 19530 | "# undef LDBL_MAX\n" |
| 19531 | "# undef FLT_EPSILON\n" |
| 19532 | "# undef DBL_EPSILON\n" |
| 19533 | "# undef LDBL_EPSILON\n" |
| 19534 | "# undef FLT_MIN\n" |
| 19535 | "# undef DBL_MIN\n" |
| 19536 | "# undef LDBL_MIN\n" |
| 19537 | "# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n" |
| 19538 | "# undef FLT_TRUE_MIN\n" |
| 19539 | "# undef DBL_TRUE_MIN\n" |
| 19540 | "# undef LDBL_TRUE_MIN\n" |
| 19541 | "# undef FLT_DECIMAL_DIG\n" |
| 19542 | "# undef DBL_DECIMAL_DIG\n" |
| 19543 | "# undef LDBL_DECIMAL_DIG\n" |
| 19544 | "# endif\n" |
| 19545 | "#endif\n" |
| 19546 | "\n" |
| 19547 | "/* Characteristics of floating point types, C99 5.2.4.2.2 */\n" |
| 19548 | "\n" |
| 19549 | "#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__\n" |
| 19550 | "#define FLT_ROUNDS (__builtin_flt_rounds())\n" |
| 19551 | "#define FLT_RADIX __FLT_RADIX__\n" |
| 19552 | "\n" |
| 19553 | "#define FLT_MANT_DIG __FLT_MANT_DIG__\n" |
| 19554 | "#define DBL_MANT_DIG __DBL_MANT_DIG__\n" |
| 19555 | "#define LDBL_MANT_DIG __LDBL_MANT_DIG__\n" |
| 19556 | "\n" |
| 19557 | "#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__)\n" |
| 19558 | "# define DECIMAL_DIG __DECIMAL_DIG__\n" |
| 19559 | "#endif\n" |
| 19560 | "\n" |
| 19561 | "#define FLT_DIG __FLT_DIG__\n" |
| 19562 | "#define DBL_DIG __DBL_DIG__\n" |
| 19563 | "#define LDBL_DIG __LDBL_DIG__\n" |
| 19564 | "\n" |
| 19565 | "#define FLT_MIN_EXP __FLT_MIN_EXP__\n" |
| 19566 | "#define DBL_MIN_EXP __DBL_MIN_EXP__\n" |
| 19567 | "#define LDBL_MIN_EXP __LDBL_MIN_EXP__\n" |
| 19568 | "\n" |
| 19569 | "#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__\n" |
| 19570 | "#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__\n" |
| 19571 | "#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__\n" |
| 19572 | "\n" |
| 19573 | "#define FLT_MAX_EXP __FLT_MAX_EXP__\n" |
| 19574 | "#define DBL_MAX_EXP __DBL_MAX_EXP__\n" |
| 19575 | "#define LDBL_MAX_EXP __LDBL_MAX_EXP__\n" |
| 19576 | "\n" |
| 19577 | "#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__\n" |
| 19578 | "#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__\n" |
| 19579 | "#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__\n" |
| 19580 | "\n" |
| 19581 | "#define FLT_MAX __FLT_MAX__\n" |
| 19582 | "#define DBL_MAX __DBL_MAX__\n" |
| 19583 | "#define LDBL_MAX __LDBL_MAX__\n" |
| 19584 | "\n" |
| 19585 | "#define FLT_EPSILON __FLT_EPSILON__\n" |
| 19586 | "#define DBL_EPSILON __DBL_EPSILON__\n" |
| 19587 | "#define LDBL_EPSILON __LDBL_EPSILON__\n" |
| 19588 | "\n" |
| 19589 | "#define FLT_MIN __FLT_MIN__\n" |
| 19590 | "#define DBL_MIN __DBL_MIN__\n" |
| 19591 | "#define LDBL_MIN __LDBL_MIN__\n" |
| 19592 | "\n" |
| 19593 | "#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__)\n" |
| 19594 | "# define FLT_TRUE_MIN __FLT_DENORM_MIN__\n" |
| 19595 | "# define DBL_TRUE_MIN __DBL_DENORM_MIN__\n" |
| 19596 | "# define LDBL_TRUE_MIN __LDBL_DENORM_MIN__\n" |
| 19597 | "# define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__\n" |
| 19598 | "# define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__\n" |
| 19599 | "# define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__\n" |
| 19600 | "#endif\n" |
| 19601 | "\n" |
| 19602 | "#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__\n" |
| 19603 | "# define FLT16_MANT_DIG __FLT16_MANT_DIG__\n" |
| 19604 | "# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__\n" |
| 19605 | "# define FLT16_DIG __FLT16_DIG__\n" |
| 19606 | "# define FLT16_MIN_EXP __FLT16_MIN_EXP__\n" |
| 19607 | "# define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__\n" |
| 19608 | "# define FLT16_MAX_EXP __FLT16_MAX_EXP__\n" |
| 19609 | "# define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__\n" |
| 19610 | "# define FLT16_MAX __FLT16_MAX__\n" |
| 19611 | "# define FLT16_EPSILON __FLT16_EPSILON__\n" |
| 19612 | "# define FLT16_MIN __FLT16_MIN__\n" |
| 19613 | "# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__\n" |
| 19614 | "#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */\n" |
| 19615 | "\n" |
| 19616 | "#endif /* __FLOAT_H */\n" |
| 19617 | "" } , |
| 19618 | { "/builtins/fma4intrin.h" , "/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===\n" |
| 19619 | " *\n" |
| 19620 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 19621 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 19622 | " * in the Software without restriction, including without limitation the rights\n" |
| 19623 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 19624 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 19625 | " * furnished to do so, subject to the following conditions:\n" |
| 19626 | " *\n" |
| 19627 | " * The above copyright notice and this permission notice shall be included in\n" |
| 19628 | " * all copies or substantial portions of the Software.\n" |
| 19629 | " *\n" |
| 19630 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 19631 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 19632 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 19633 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 19634 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 19635 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 19636 | " * THE SOFTWARE.\n" |
| 19637 | " *\n" |
| 19638 | " *===-----------------------------------------------------------------------===\n" |
| 19639 | " */\n" |
| 19640 | "\n" |
| 19641 | "#ifndef __X86INTRIN_H\n" |
| 19642 | "#error \"Never use <fma4intrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 19643 | "#endif\n" |
| 19644 | "\n" |
| 19645 | "#ifndef __FMA4INTRIN_H\n" |
| 19646 | "#define __FMA4INTRIN_H\n" |
| 19647 | "\n" |
| 19648 | "#include <pmmintrin.h>\n" |
| 19649 | "\n" |
| 19650 | "/* Define the default attributes for the functions in this file. */\n" |
| 19651 | "#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(128)))\n" |
| 19652 | "#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma4\"), __min_vector_width__(256)))\n" |
| 19653 | "\n" |
| 19654 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19655 | "_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19656 | "{\n" |
| 19657 | " return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19658 | "}\n" |
| 19659 | "\n" |
| 19660 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19661 | "_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19662 | "{\n" |
| 19663 | " return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19664 | "}\n" |
| 19665 | "\n" |
| 19666 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19667 | "_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19668 | "{\n" |
| 19669 | " return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19670 | "}\n" |
| 19671 | "\n" |
| 19672 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19673 | "_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19674 | "{\n" |
| 19675 | " return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19676 | "}\n" |
| 19677 | "\n" |
| 19678 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19679 | "_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19680 | "{\n" |
| 19681 | " return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19682 | "}\n" |
| 19683 | "\n" |
| 19684 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19685 | "_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19686 | "{\n" |
| 19687 | " return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 19688 | "}\n" |
| 19689 | "\n" |
| 19690 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19691 | "_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19692 | "{\n" |
| 19693 | " return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19694 | "}\n" |
| 19695 | "\n" |
| 19696 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19697 | "_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19698 | "{\n" |
| 19699 | " return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 19700 | "}\n" |
| 19701 | "\n" |
| 19702 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19703 | "_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19704 | "{\n" |
| 19705 | " return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19706 | "}\n" |
| 19707 | "\n" |
| 19708 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19709 | "_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19710 | "{\n" |
| 19711 | " return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19712 | "}\n" |
| 19713 | "\n" |
| 19714 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19715 | "_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19716 | "{\n" |
| 19717 | " return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19718 | "}\n" |
| 19719 | "\n" |
| 19720 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19721 | "_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19722 | "{\n" |
| 19723 | " return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19724 | "}\n" |
| 19725 | "\n" |
| 19726 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19727 | "_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19728 | "{\n" |
| 19729 | " return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19730 | "}\n" |
| 19731 | "\n" |
| 19732 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19733 | "_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19734 | "{\n" |
| 19735 | " return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 19736 | "}\n" |
| 19737 | "\n" |
| 19738 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19739 | "_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19740 | "{\n" |
| 19741 | " return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19742 | "}\n" |
| 19743 | "\n" |
| 19744 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19745 | "_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19746 | "{\n" |
| 19747 | " return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 19748 | "}\n" |
| 19749 | "\n" |
| 19750 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19751 | "_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19752 | "{\n" |
| 19753 | " return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19754 | "}\n" |
| 19755 | "\n" |
| 19756 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19757 | "_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19758 | "{\n" |
| 19759 | " return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19760 | "}\n" |
| 19761 | "\n" |
| 19762 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19763 | "_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19764 | "{\n" |
| 19765 | " return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19766 | "}\n" |
| 19767 | "\n" |
| 19768 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19769 | "_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19770 | "{\n" |
| 19771 | " return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 19772 | "}\n" |
| 19773 | "\n" |
| 19774 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 19775 | "_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 19776 | "{\n" |
| 19777 | " return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
| 19778 | "}\n" |
| 19779 | "\n" |
| 19780 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 19781 | "_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 19782 | "{\n" |
| 19783 | " return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
| 19784 | "}\n" |
| 19785 | "\n" |
| 19786 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 19787 | "_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 19788 | "{\n" |
| 19789 | " return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
| 19790 | "}\n" |
| 19791 | "\n" |
| 19792 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 19793 | "_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 19794 | "{\n" |
| 19795 | " return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
| 19796 | "}\n" |
| 19797 | "\n" |
| 19798 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 19799 | "_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 19800 | "{\n" |
| 19801 | " return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
| 19802 | "}\n" |
| 19803 | "\n" |
| 19804 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 19805 | "_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 19806 | "{\n" |
| 19807 | " return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
| 19808 | "}\n" |
| 19809 | "\n" |
| 19810 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 19811 | "_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 19812 | "{\n" |
| 19813 | " return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
| 19814 | "}\n" |
| 19815 | "\n" |
| 19816 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 19817 | "_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 19818 | "{\n" |
| 19819 | " return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
| 19820 | "}\n" |
| 19821 | "\n" |
| 19822 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 19823 | "_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 19824 | "{\n" |
| 19825 | " return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
| 19826 | "}\n" |
| 19827 | "\n" |
| 19828 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 19829 | "_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 19830 | "{\n" |
| 19831 | " return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
| 19832 | "}\n" |
| 19833 | "\n" |
| 19834 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 19835 | "_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 19836 | "{\n" |
| 19837 | " return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
| 19838 | "}\n" |
| 19839 | "\n" |
| 19840 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 19841 | "_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 19842 | "{\n" |
| 19843 | " return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
| 19844 | "}\n" |
| 19845 | "\n" |
| 19846 | "#undef __DEFAULT_FN_ATTRS128\n" |
| 19847 | "#undef __DEFAULT_FN_ATTRS256\n" |
| 19848 | "\n" |
| 19849 | "#endif /* __FMA4INTRIN_H */\n" |
| 19850 | "" } , |
| 19851 | { "/builtins/fmaintrin.h" , "/*===---- fmaintrin.h - FMA intrinsics -------------------------------------===\n" |
| 19852 | " *\n" |
| 19853 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 19854 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 19855 | " * in the Software without restriction, including without limitation the rights\n" |
| 19856 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 19857 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 19858 | " * furnished to do so, subject to the following conditions:\n" |
| 19859 | " *\n" |
| 19860 | " * The above copyright notice and this permission notice shall be included in\n" |
| 19861 | " * all copies or substantial portions of the Software.\n" |
| 19862 | " *\n" |
| 19863 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 19864 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 19865 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 19866 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 19867 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 19868 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 19869 | " * THE SOFTWARE.\n" |
| 19870 | " *\n" |
| 19871 | " *===-----------------------------------------------------------------------===\n" |
| 19872 | " */\n" |
| 19873 | "\n" |
| 19874 | "#ifndef __IMMINTRIN_H\n" |
| 19875 | "#error \"Never use <fmaintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 19876 | "#endif\n" |
| 19877 | "\n" |
| 19878 | "#ifndef __FMAINTRIN_H\n" |
| 19879 | "#define __FMAINTRIN_H\n" |
| 19880 | "\n" |
| 19881 | "/* Define the default attributes for the functions in this file. */\n" |
| 19882 | "#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(128)))\n" |
| 19883 | "#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"fma\"), __min_vector_width__(256)))\n" |
| 19884 | "\n" |
| 19885 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19886 | "_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19887 | "{\n" |
| 19888 | " return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19889 | "}\n" |
| 19890 | "\n" |
| 19891 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19892 | "_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19893 | "{\n" |
| 19894 | " return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19895 | "}\n" |
| 19896 | "\n" |
| 19897 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19898 | "_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19899 | "{\n" |
| 19900 | " return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19901 | "}\n" |
| 19902 | "\n" |
| 19903 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19904 | "_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19905 | "{\n" |
| 19906 | " return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19907 | "}\n" |
| 19908 | "\n" |
| 19909 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19910 | "_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19911 | "{\n" |
| 19912 | " return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19913 | "}\n" |
| 19914 | "\n" |
| 19915 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19916 | "_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19917 | "{\n" |
| 19918 | " return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 19919 | "}\n" |
| 19920 | "\n" |
| 19921 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19922 | "_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19923 | "{\n" |
| 19924 | " return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19925 | "}\n" |
| 19926 | "\n" |
| 19927 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19928 | "_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19929 | "{\n" |
| 19930 | " return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 19931 | "}\n" |
| 19932 | "\n" |
| 19933 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19934 | "_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19935 | "{\n" |
| 19936 | " return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19937 | "}\n" |
| 19938 | "\n" |
| 19939 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19940 | "_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19941 | "{\n" |
| 19942 | " return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19943 | "}\n" |
| 19944 | "\n" |
| 19945 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19946 | "_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19947 | "{\n" |
| 19948 | " return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);\n" |
| 19949 | "}\n" |
| 19950 | "\n" |
| 19951 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19952 | "_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19953 | "{\n" |
| 19954 | " return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);\n" |
| 19955 | "}\n" |
| 19956 | "\n" |
| 19957 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19958 | "_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19959 | "{\n" |
| 19960 | " return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19961 | "}\n" |
| 19962 | "\n" |
| 19963 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19964 | "_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19965 | "{\n" |
| 19966 | " return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 19967 | "}\n" |
| 19968 | "\n" |
| 19969 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19970 | "_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19971 | "{\n" |
| 19972 | " return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);\n" |
| 19973 | "}\n" |
| 19974 | "\n" |
| 19975 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19976 | "_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19977 | "{\n" |
| 19978 | " return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);\n" |
| 19979 | "}\n" |
| 19980 | "\n" |
| 19981 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19982 | "_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19983 | "{\n" |
| 19984 | " return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);\n" |
| 19985 | "}\n" |
| 19986 | "\n" |
| 19987 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 19988 | "_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 19989 | "{\n" |
| 19990 | " return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);\n" |
| 19991 | "}\n" |
| 19992 | "\n" |
| 19993 | "static __inline__ __m128 __DEFAULT_FN_ATTRS128\n" |
| 19994 | "_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)\n" |
| 19995 | "{\n" |
| 19996 | " return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);\n" |
| 19997 | "}\n" |
| 19998 | "\n" |
| 19999 | "static __inline__ __m128d __DEFAULT_FN_ATTRS128\n" |
| 20000 | "_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)\n" |
| 20001 | "{\n" |
| 20002 | " return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);\n" |
| 20003 | "}\n" |
| 20004 | "\n" |
| 20005 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 20006 | "_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 20007 | "{\n" |
| 20008 | " return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
| 20009 | "}\n" |
| 20010 | "\n" |
| 20011 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 20012 | "_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 20013 | "{\n" |
| 20014 | " return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
| 20015 | "}\n" |
| 20016 | "\n" |
| 20017 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 20018 | "_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 20019 | "{\n" |
| 20020 | " return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
| 20021 | "}\n" |
| 20022 | "\n" |
| 20023 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 20024 | "_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 20025 | "{\n" |
| 20026 | " return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
| 20027 | "}\n" |
| 20028 | "\n" |
| 20029 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 20030 | "_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 20031 | "{\n" |
| 20032 | " return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
| 20033 | "}\n" |
| 20034 | "\n" |
| 20035 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 20036 | "_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 20037 | "{\n" |
| 20038 | " return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
| 20039 | "}\n" |
| 20040 | "\n" |
| 20041 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 20042 | "_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 20043 | "{\n" |
| 20044 | " return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
| 20045 | "}\n" |
| 20046 | "\n" |
| 20047 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 20048 | "_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 20049 | "{\n" |
| 20050 | " return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
| 20051 | "}\n" |
| 20052 | "\n" |
| 20053 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 20054 | "_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 20055 | "{\n" |
| 20056 | " return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);\n" |
| 20057 | "}\n" |
| 20058 | "\n" |
| 20059 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 20060 | "_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 20061 | "{\n" |
| 20062 | " return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);\n" |
| 20063 | "}\n" |
| 20064 | "\n" |
| 20065 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 20066 | "_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)\n" |
| 20067 | "{\n" |
| 20068 | " return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);\n" |
| 20069 | "}\n" |
| 20070 | "\n" |
| 20071 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 20072 | "_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)\n" |
| 20073 | "{\n" |
| 20074 | " return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);\n" |
| 20075 | "}\n" |
| 20076 | "\n" |
| 20077 | "#undef __DEFAULT_FN_ATTRS128\n" |
| 20078 | "#undef __DEFAULT_FN_ATTRS256\n" |
| 20079 | "\n" |
| 20080 | "#endif /* __FMAINTRIN_H */\n" |
| 20081 | "" } , |
| 20082 | { "/builtins/fxsrintrin.h" , "/*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------===\n" |
| 20083 | " *\n" |
| 20084 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 20085 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 20086 | " * in the Software without restriction, including without limitation the rights\n" |
| 20087 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 20088 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 20089 | " * furnished to do so, subject to the following conditions:\n" |
| 20090 | " *\n" |
| 20091 | " * The above copyright notice and this permission notice shall be included in\n" |
| 20092 | " * all copies or substantial portions of the Software.\n" |
| 20093 | " *\n" |
| 20094 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 20095 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 20096 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 20097 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 20098 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 20099 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 20100 | " * THE SOFTWARE.\n" |
| 20101 | " *\n" |
| 20102 | " *===-----------------------------------------------------------------------===\n" |
| 20103 | " */\n" |
| 20104 | "\n" |
| 20105 | "#ifndef __IMMINTRIN_H\n" |
| 20106 | "#error \"Never use <fxsrintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 20107 | "#endif\n" |
| 20108 | "\n" |
| 20109 | "#ifndef __FXSRINTRIN_H\n" |
| 20110 | "#define __FXSRINTRIN_H\n" |
| 20111 | "\n" |
| 20112 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"fxsr\")))\n" |
| 20113 | "\n" |
| 20114 | "/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n" |
| 20115 | "/// memory region pointed to by the input parameter \\a __p.\n" |
| 20116 | "///\n" |
| 20117 | "/// \\headerfile <x86intrin.h>\n" |
| 20118 | "///\n" |
| 20119 | "/// This intrinsic corresponds to the <c> FXSAVE </c> instruction.\n" |
| 20120 | "///\n" |
| 20121 | "/// \\param __p\n" |
| 20122 | "/// A pointer to a 512-byte memory region. The beginning of this memory\n" |
| 20123 | "/// region should be aligned on a 16-byte boundary.\n" |
| 20124 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 20125 | "_fxsave(void *__p)\n" |
| 20126 | "{\n" |
| 20127 | " __builtin_ia32_fxsave(__p);\n" |
| 20128 | "}\n" |
| 20129 | "\n" |
| 20130 | "/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n" |
| 20131 | "/// memory region pointed to by the input parameter \\a __p. The contents of\n" |
| 20132 | "/// this memory region should have been written to by a previous \\c _fxsave\n" |
| 20133 | "/// or \\c _fxsave64 intrinsic.\n" |
| 20134 | "///\n" |
| 20135 | "/// \\headerfile <x86intrin.h>\n" |
| 20136 | "///\n" |
| 20137 | "/// This intrinsic corresponds to the <c> FXRSTOR </c> instruction.\n" |
| 20138 | "///\n" |
| 20139 | "/// \\param __p\n" |
| 20140 | "/// A pointer to a 512-byte memory region. The beginning of this memory\n" |
| 20141 | "/// region should be aligned on a 16-byte boundary.\n" |
| 20142 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 20143 | "_fxrstor(void *__p)\n" |
| 20144 | "{\n" |
| 20145 | " __builtin_ia32_fxrstor(__p);\n" |
| 20146 | "}\n" |
| 20147 | "\n" |
| 20148 | "#ifdef __x86_64__\n" |
| 20149 | "/// Saves the XMM, MMX, MXCSR and x87 FPU registers into a 512-byte\n" |
| 20150 | "/// memory region pointed to by the input parameter \\a __p.\n" |
| 20151 | "///\n" |
| 20152 | "/// \\headerfile <x86intrin.h>\n" |
| 20153 | "///\n" |
| 20154 | "/// This intrinsic corresponds to the <c> FXSAVE64 </c> instruction.\n" |
| 20155 | "///\n" |
| 20156 | "/// \\param __p\n" |
| 20157 | "/// A pointer to a 512-byte memory region. The beginning of this memory\n" |
| 20158 | "/// region should be aligned on a 16-byte boundary.\n" |
| 20159 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 20160 | "_fxsave64(void *__p)\n" |
| 20161 | "{\n" |
| 20162 | " __builtin_ia32_fxsave64(__p);\n" |
| 20163 | "}\n" |
| 20164 | "\n" |
| 20165 | "/// Restores the XMM, MMX, MXCSR and x87 FPU registers from the 512-byte\n" |
| 20166 | "/// memory region pointed to by the input parameter \\a __p. The contents of\n" |
| 20167 | "/// this memory region should have been written to by a previous \\c _fxsave\n" |
| 20168 | "/// or \\c _fxsave64 intrinsic.\n" |
| 20169 | "///\n" |
| 20170 | "/// \\headerfile <x86intrin.h>\n" |
| 20171 | "///\n" |
| 20172 | "/// This intrinsic corresponds to the <c> FXRSTOR64 </c> instruction.\n" |
| 20173 | "///\n" |
| 20174 | "/// \\param __p\n" |
| 20175 | "/// A pointer to a 512-byte memory region. The beginning of this memory\n" |
| 20176 | "/// region should be aligned on a 16-byte boundary.\n" |
| 20177 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 20178 | "_fxrstor64(void *__p)\n" |
| 20179 | "{\n" |
| 20180 | " __builtin_ia32_fxrstor64(__p);\n" |
| 20181 | "}\n" |
| 20182 | "#endif\n" |
| 20183 | "\n" |
| 20184 | "#undef __DEFAULT_FN_ATTRS\n" |
| 20185 | "\n" |
| 20186 | "#endif\n" |
| 20187 | "" } , |
| 20188 | { "/builtins/gfniintrin.h" , "/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===\n" |
| 20189 | " *\n" |
| 20190 | " *\n" |
| 20191 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 20192 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 20193 | " * in the Software without restriction, including without limitation the rights\n" |
| 20194 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 20195 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 20196 | " * furnished to do so, subject to the following conditions:\n" |
| 20197 | " *\n" |
| 20198 | " * The above copyright notice and this permission notice shall be included in\n" |
| 20199 | " * all copies or substantial portions of the Software.\n" |
| 20200 | " *\n" |
| 20201 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 20202 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 20203 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 20204 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 20205 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 20206 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 20207 | " * THE SOFTWARE.\n" |
| 20208 | " *\n" |
| 20209 | " *===-----------------------------------------------------------------------===\n" |
| 20210 | " */\n" |
| 20211 | "#ifndef __IMMINTRIN_H\n" |
| 20212 | "#error \"Never use <gfniintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 20213 | "#endif\n" |
| 20214 | "\n" |
| 20215 | "#ifndef __GFNIINTRIN_H\n" |
| 20216 | "#define __GFNIINTRIN_H\n" |
| 20217 | "\n" |
| 20218 | "\n" |
| 20219 | "#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \\\n" |
| 20220 | " (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \\\n" |
| 20221 | " (__v16qi)(__m128i)(B), \\\n" |
| 20222 | " (char)(I))\n" |
| 20223 | "\n" |
| 20224 | "#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n" |
| 20225 | " (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n" |
| 20226 | " (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \\\n" |
| 20227 | " (__v16qi)(__m128i)(S))\n" |
| 20228 | "\n" |
| 20229 | "\n" |
| 20230 | "#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n" |
| 20231 | " (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n" |
| 20232 | " U, A, B, I)\n" |
| 20233 | "\n" |
| 20234 | "\n" |
| 20235 | "#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \\\n" |
| 20236 | " (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \\\n" |
| 20237 | " (__v32qi)(__m256i)(B), \\\n" |
| 20238 | " (char)(I))\n" |
| 20239 | "\n" |
| 20240 | "#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n" |
| 20241 | " (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n" |
| 20242 | " (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \\\n" |
| 20243 | " (__v32qi)(__m256i)(S))\n" |
| 20244 | "\n" |
| 20245 | "#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n" |
| 20246 | " (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n" |
| 20247 | " U, A, B, I)\n" |
| 20248 | "\n" |
| 20249 | "\n" |
| 20250 | "#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \\\n" |
| 20251 | " (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \\\n" |
| 20252 | " (__v64qi)(__m512i)(B), \\\n" |
| 20253 | " (char)(I))\n" |
| 20254 | "\n" |
| 20255 | "#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \\\n" |
| 20256 | " (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n" |
| 20257 | " (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \\\n" |
| 20258 | " (__v64qi)(__m512i)(S))\n" |
| 20259 | "\n" |
| 20260 | "#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \\\n" |
| 20261 | " (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n" |
| 20262 | " U, A, B, I)\n" |
| 20263 | "\n" |
| 20264 | "#define _mm_gf2p8affine_epi64_epi8(A, B, I) \\\n" |
| 20265 | " (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \\\n" |
| 20266 | " (__v16qi)(__m128i)(B), \\\n" |
| 20267 | " (char)(I))\n" |
| 20268 | "\n" |
| 20269 | "#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n" |
| 20270 | " (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \\\n" |
| 20271 | " (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \\\n" |
| 20272 | " (__v16qi)(__m128i)(S))\n" |
| 20273 | "\n" |
| 20274 | "\n" |
| 20275 | "#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n" |
| 20276 | " (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \\\n" |
| 20277 | " U, A, B, I)\n" |
| 20278 | "\n" |
| 20279 | "\n" |
| 20280 | "#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \\\n" |
| 20281 | " (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \\\n" |
| 20282 | " (__v32qi)(__m256i)(B), \\\n" |
| 20283 | " (char)(I))\n" |
| 20284 | "\n" |
| 20285 | "#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n" |
| 20286 | " (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \\\n" |
| 20287 | " (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \\\n" |
| 20288 | " (__v32qi)(__m256i)(S))\n" |
| 20289 | "\n" |
| 20290 | "#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n" |
| 20291 | " (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \\\n" |
| 20292 | " U, A, B, I)\n" |
| 20293 | "\n" |
| 20294 | "\n" |
| 20295 | "#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \\\n" |
| 20296 | " (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \\\n" |
| 20297 | " (__v64qi)(__m512i)(B), \\\n" |
| 20298 | " (char)(I))\n" |
| 20299 | "\n" |
| 20300 | "#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \\\n" |
| 20301 | " (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \\\n" |
| 20302 | " (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \\\n" |
| 20303 | " (__v64qi)(__m512i)(S))\n" |
| 20304 | "\n" |
| 20305 | "#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \\\n" |
| 20306 | " (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \\\n" |
| 20307 | " U, A, B, I)\n" |
| 20308 | "\n" |
| 20309 | "/* Default attributes for simple form (no masking). */\n" |
| 20310 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"gfni\"), __min_vector_width__(128)))\n" |
| 20311 | "\n" |
| 20312 | "/* Default attributes for YMM unmasked form. */\n" |
| 20313 | "#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__(\"avx,gfni\"), __min_vector_width__(256)))\n" |
| 20314 | "\n" |
| 20315 | "/* Default attributes for ZMM forms. */\n" |
| 20316 | "#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,gfni\"), __min_vector_width__(512)))\n" |
| 20317 | "\n" |
| 20318 | "/* Default attributes for VLX forms. */\n" |
| 20319 | "#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(128)))\n" |
| 20320 | "#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__(\"avx512bw,avx512vl,gfni\"), __min_vector_width__(256)))\n" |
| 20321 | "\n" |
| 20322 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 20323 | "_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)\n" |
| 20324 | "{\n" |
| 20325 | " return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,\n" |
| 20326 | " (__v16qi) __B);\n" |
| 20327 | "}\n" |
| 20328 | "\n" |
| 20329 | "static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n" |
| 20330 | "_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)\n" |
| 20331 | "{\n" |
| 20332 | " return (__m128i) __builtin_ia32_selectb_128(__U,\n" |
| 20333 | " (__v16qi) _mm_gf2p8mul_epi8(__A, __B),\n" |
| 20334 | " (__v16qi) __S);\n" |
| 20335 | "}\n" |
| 20336 | "\n" |
| 20337 | "static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128\n" |
| 20338 | "_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)\n" |
| 20339 | "{\n" |
| 20340 | " return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),\n" |
| 20341 | " __U, __A, __B);\n" |
| 20342 | "}\n" |
| 20343 | "\n" |
| 20344 | "static __inline__ __m256i __DEFAULT_FN_ATTRS_Y\n" |
| 20345 | "_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)\n" |
| 20346 | "{\n" |
| 20347 | " return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,\n" |
| 20348 | " (__v32qi) __B);\n" |
| 20349 | "}\n" |
| 20350 | "\n" |
| 20351 | "static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n" |
| 20352 | "_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)\n" |
| 20353 | "{\n" |
| 20354 | " return (__m256i) __builtin_ia32_selectb_256(__U,\n" |
| 20355 | " (__v32qi) _mm256_gf2p8mul_epi8(__A, __B),\n" |
| 20356 | " (__v32qi) __S);\n" |
| 20357 | "}\n" |
| 20358 | "\n" |
| 20359 | "static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256\n" |
| 20360 | "_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)\n" |
| 20361 | "{\n" |
| 20362 | " return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),\n" |
| 20363 | " __U, __A, __B);\n" |
| 20364 | "}\n" |
| 20365 | "\n" |
| 20366 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n" |
| 20367 | "_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)\n" |
| 20368 | "{\n" |
| 20369 | " return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,\n" |
| 20370 | " (__v64qi) __B);\n" |
| 20371 | "}\n" |
| 20372 | "\n" |
| 20373 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n" |
| 20374 | "_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)\n" |
| 20375 | "{\n" |
| 20376 | " return (__m512i) __builtin_ia32_selectb_512(__U,\n" |
| 20377 | " (__v64qi) _mm512_gf2p8mul_epi8(__A, __B),\n" |
| 20378 | " (__v64qi) __S);\n" |
| 20379 | "}\n" |
| 20380 | "\n" |
| 20381 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_Z\n" |
| 20382 | "_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)\n" |
| 20383 | "{\n" |
| 20384 | " return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(),\n" |
| 20385 | " __U, __A, __B);\n" |
| 20386 | "}\n" |
| 20387 | "\n" |
| 20388 | "#undef __DEFAULT_FN_ATTRS\n" |
| 20389 | "#undef __DEFAULT_FN_ATTRS_Y\n" |
| 20390 | "#undef __DEFAULT_FN_ATTRS_Z\n" |
| 20391 | "#undef __DEFAULT_FN_ATTRS_VL128\n" |
| 20392 | "#undef __DEFAULT_FN_ATTRS_VL256\n" |
| 20393 | "\n" |
| 20394 | "#endif /* __GFNIINTRIN_H */\n" |
| 20395 | "\n" |
| 20396 | "" } , |
| 20397 | { "/builtins/htmintrin.h" , "/*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\\\n" |
| 20398 | " *\n" |
| 20399 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 20400 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 20401 | " * in the Software without restriction, including without limitation the rights\n" |
| 20402 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 20403 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 20404 | " * furnished to do so, subject to the following conditions:\n" |
| 20405 | " *\n" |
| 20406 | " * The above copyright notice and this permission notice shall be included in\n" |
| 20407 | " * all copies or substantial portions of the Software.\n" |
| 20408 | " *\n" |
| 20409 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 20410 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 20411 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 20412 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 20413 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 20414 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 20415 | " * THE SOFTWARE.\n" |
| 20416 | " *\n" |
| 20417 | "\\*===----------------------------------------------------------------------===*/\n" |
| 20418 | "\n" |
| 20419 | "#ifndef __HTMINTRIN_H\n" |
| 20420 | "#define __HTMINTRIN_H\n" |
| 20421 | "\n" |
| 20422 | "#ifndef __HTM__\n" |
| 20423 | "#error \"HTM instruction set not enabled\"\n" |
| 20424 | "#endif\n" |
| 20425 | "\n" |
| 20426 | "#ifdef __powerpc__\n" |
| 20427 | "\n" |
| 20428 | "#include <stdint.h>\n" |
| 20429 | "\n" |
| 20430 | "typedef uint64_t texasr_t;\n" |
| 20431 | "typedef uint32_t texasru_t;\n" |
| 20432 | "typedef uint32_t texasrl_t;\n" |
| 20433 | "typedef uintptr_t tfiar_t;\n" |
| 20434 | "typedef uintptr_t tfhar_t;\n" |
| 20435 | "\n" |
| 20436 | "#define _HTM_STATE(CR0) ((CR0 >> 1) & 0x3)\n" |
| 20437 | "#define _HTM_NONTRANSACTIONAL 0x0\n" |
| 20438 | "#define _HTM_SUSPENDED 0x1\n" |
| 20439 | "#define _HTM_TRANSACTIONAL 0x2\n" |
| 20440 | "\n" |
| 20441 | "#define _TEXASR_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n" |
| 20442 | " (((TEXASR) >> (63-(BITNUM))) & ((1<<(SIZE))-1))\n" |
| 20443 | "#define _TEXASRU_EXTRACT_BITS(TEXASR,BITNUM,SIZE) \\\n" |
| 20444 | " (((TEXASR) >> (31-(BITNUM))) & ((1<<(SIZE))-1))\n" |
| 20445 | "\n" |
| 20446 | "#define _TEXASR_FAILURE_CODE(TEXASR) \\\n" |
| 20447 | " _TEXASR_EXTRACT_BITS(TEXASR, 7, 8)\n" |
| 20448 | "#define _TEXASRU_FAILURE_CODE(TEXASRU) \\\n" |
| 20449 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 8)\n" |
| 20450 | "\n" |
| 20451 | "#define _TEXASR_FAILURE_PERSISTENT(TEXASR) \\\n" |
| 20452 | " _TEXASR_EXTRACT_BITS(TEXASR, 7, 1)\n" |
| 20453 | "#define _TEXASRU_FAILURE_PERSISTENT(TEXASRU) \\\n" |
| 20454 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 7, 1)\n" |
| 20455 | "\n" |
| 20456 | "#define _TEXASR_DISALLOWED(TEXASR) \\\n" |
| 20457 | " _TEXASR_EXTRACT_BITS(TEXASR, 8, 1)\n" |
| 20458 | "#define _TEXASRU_DISALLOWED(TEXASRU) \\\n" |
| 20459 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 8, 1)\n" |
| 20460 | "\n" |
| 20461 | "#define _TEXASR_NESTING_OVERFLOW(TEXASR) \\\n" |
| 20462 | " _TEXASR_EXTRACT_BITS(TEXASR, 9, 1)\n" |
| 20463 | "#define _TEXASRU_NESTING_OVERFLOW(TEXASRU) \\\n" |
| 20464 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 9, 1)\n" |
| 20465 | "\n" |
| 20466 | "#define _TEXASR_FOOTPRINT_OVERFLOW(TEXASR) \\\n" |
| 20467 | " _TEXASR_EXTRACT_BITS(TEXASR, 10, 1)\n" |
| 20468 | "#define _TEXASRU_FOOTPRINT_OVERFLOW(TEXASRU) \\\n" |
| 20469 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 10, 1)\n" |
| 20470 | "\n" |
| 20471 | "#define _TEXASR_SELF_INDUCED_CONFLICT(TEXASR) \\\n" |
| 20472 | " _TEXASR_EXTRACT_BITS(TEXASR, 11, 1)\n" |
| 20473 | "#define _TEXASRU_SELF_INDUCED_CONFLICT(TEXASRU) \\\n" |
| 20474 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 11, 1)\n" |
| 20475 | "\n" |
| 20476 | "#define _TEXASR_NON_TRANSACTIONAL_CONFLICT(TEXASR) \\\n" |
| 20477 | " _TEXASR_EXTRACT_BITS(TEXASR, 12, 1)\n" |
| 20478 | "#define _TEXASRU_NON_TRANSACTIONAL_CONFLICT(TEXASRU) \\\n" |
| 20479 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 12, 1)\n" |
| 20480 | "\n" |
| 20481 | "#define _TEXASR_TRANSACTION_CONFLICT(TEXASR) \\\n" |
| 20482 | " _TEXASR_EXTRACT_BITS(TEXASR, 13, 1)\n" |
| 20483 | "#define _TEXASRU_TRANSACTION_CONFLICT(TEXASRU) \\\n" |
| 20484 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 13, 1)\n" |
| 20485 | "\n" |
| 20486 | "#define _TEXASR_TRANSLATION_INVALIDATION_CONFLICT(TEXASR) \\\n" |
| 20487 | " _TEXASR_EXTRACT_BITS(TEXASR, 14, 1)\n" |
| 20488 | "#define _TEXASRU_TRANSLATION_INVALIDATION_CONFLICT(TEXASRU) \\\n" |
| 20489 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 14, 1)\n" |
| 20490 | "\n" |
| 20491 | "#define _TEXASR_IMPLEMENTAION_SPECIFIC(TEXASR) \\\n" |
| 20492 | " _TEXASR_EXTRACT_BITS(TEXASR, 15, 1)\n" |
| 20493 | "#define _TEXASRU_IMPLEMENTAION_SPECIFIC(TEXASRU) \\\n" |
| 20494 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 15, 1)\n" |
| 20495 | "\n" |
| 20496 | "#define _TEXASR_INSTRUCTION_FETCH_CONFLICT(TEXASR) \\\n" |
| 20497 | " _TEXASR_EXTRACT_BITS(TEXASR, 16, 1)\n" |
| 20498 | "#define _TEXASRU_INSTRUCTION_FETCH_CONFLICT(TEXASRU) \\\n" |
| 20499 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 16, 1)\n" |
| 20500 | "\n" |
| 20501 | "#define _TEXASR_ABORT(TEXASR) \\\n" |
| 20502 | " _TEXASR_EXTRACT_BITS(TEXASR, 31, 1)\n" |
| 20503 | "#define _TEXASRU_ABORT(TEXASRU) \\\n" |
| 20504 | " _TEXASRU_EXTRACT_BITS(TEXASRU, 31, 1)\n" |
| 20505 | "\n" |
| 20506 | "\n" |
| 20507 | "#define _TEXASR_SUSPENDED(TEXASR) \\\n" |
| 20508 | " _TEXASR_EXTRACT_BITS(TEXASR, 32, 1)\n" |
| 20509 | "\n" |
| 20510 | "#define _TEXASR_PRIVILEGE(TEXASR) \\\n" |
| 20511 | " _TEXASR_EXTRACT_BITS(TEXASR, 35, 2)\n" |
| 20512 | "\n" |
| 20513 | "#define _TEXASR_FAILURE_SUMMARY(TEXASR) \\\n" |
| 20514 | " _TEXASR_EXTRACT_BITS(TEXASR, 36, 1)\n" |
| 20515 | "\n" |
| 20516 | "#define _TEXASR_TFIAR_EXACT(TEXASR) \\\n" |
| 20517 | " _TEXASR_EXTRACT_BITS(TEXASR, 37, 1)\n" |
| 20518 | "\n" |
| 20519 | "#define _TEXASR_ROT(TEXASR) \\\n" |
| 20520 | " _TEXASR_EXTRACT_BITS(TEXASR, 38, 1)\n" |
| 20521 | "\n" |
| 20522 | "#define _TEXASR_TRANSACTION_LEVEL(TEXASR) \\\n" |
| 20523 | " _TEXASR_EXTRACT_BITS(TEXASR, 63, 12)\n" |
| 20524 | "\n" |
| 20525 | "#endif /* __powerpc */\n" |
| 20526 | "\n" |
| 20527 | "#ifdef __s390__\n" |
| 20528 | "\n" |
| 20529 | "/* Condition codes generated by tbegin */\n" |
| 20530 | "#define _HTM_TBEGIN_STARTED 0\n" |
| 20531 | "#define _HTM_TBEGIN_INDETERMINATE 1\n" |
| 20532 | "#define _HTM_TBEGIN_TRANSIENT 2\n" |
| 20533 | "#define _HTM_TBEGIN_PERSISTENT 3\n" |
| 20534 | "\n" |
| 20535 | "/* The abort codes below this threshold are reserved for machine use. */\n" |
| 20536 | "#define _HTM_FIRST_USER_ABORT_CODE 256\n" |
| 20537 | "\n" |
| 20538 | "/* The transaction diagnostic block is it is defined in the Principles\n" |
| 20539 | " of Operation chapter 5-91. */\n" |
| 20540 | "\n" |
| 20541 | "struct __htm_tdb {\n" |
| 20542 | " unsigned char format; /* 0 */\n" |
| 20543 | " unsigned char flags;\n" |
| 20544 | " unsigned char reserved1[4];\n" |
| 20545 | " unsigned short nesting_depth;\n" |
| 20546 | " unsigned long long abort_code; /* 8 */\n" |
| 20547 | " unsigned long long conflict_token; /* 16 */\n" |
| 20548 | " unsigned long long atia; /* 24 */\n" |
| 20549 | " unsigned char eaid; /* 32 */\n" |
| 20550 | " unsigned char dxc;\n" |
| 20551 | " unsigned char reserved2[2];\n" |
| 20552 | " unsigned int program_int_id;\n" |
| 20553 | " unsigned long long exception_id; /* 40 */\n" |
| 20554 | " unsigned long long bea; /* 48 */\n" |
| 20555 | " unsigned char reserved3[72]; /* 56 */\n" |
| 20556 | " unsigned long long gprs[16]; /* 128 */\n" |
| 20557 | "} __attribute__((__packed__, __aligned__ (8)));\n" |
| 20558 | "\n" |
| 20559 | "\n" |
| 20560 | "/* Helper intrinsics to retry tbegin in case of transient failure. */\n" |
| 20561 | "\n" |
| 20562 | "static __inline int __attribute__((__always_inline__, __nodebug__))\n" |
| 20563 | "__builtin_tbegin_retry_null (int __retry)\n" |
| 20564 | "{\n" |
| 20565 | " int cc, i = 0;\n" |
| 20566 | "\n" |
| 20567 | " while ((cc = __builtin_tbegin(0)) == _HTM_TBEGIN_TRANSIENT\n" |
| 20568 | " && i++ < __retry)\n" |
| 20569 | " __builtin_tx_assist(i);\n" |
| 20570 | "\n" |
| 20571 | " return cc;\n" |
| 20572 | "}\n" |
| 20573 | "\n" |
| 20574 | "static __inline int __attribute__((__always_inline__, __nodebug__))\n" |
| 20575 | "__builtin_tbegin_retry_tdb (void *__tdb, int __retry)\n" |
| 20576 | "{\n" |
| 20577 | " int cc, i = 0;\n" |
| 20578 | "\n" |
| 20579 | " while ((cc = __builtin_tbegin(__tdb)) == _HTM_TBEGIN_TRANSIENT\n" |
| 20580 | " && i++ < __retry)\n" |
| 20581 | " __builtin_tx_assist(i);\n" |
| 20582 | "\n" |
| 20583 | " return cc;\n" |
| 20584 | "}\n" |
| 20585 | "\n" |
| 20586 | "#define __builtin_tbegin_retry(tdb, retry) \\\n" |
| 20587 | " (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n" |
| 20588 | " __builtin_tbegin_retry_null(retry) : \\\n" |
| 20589 | " __builtin_tbegin_retry_tdb(tdb, retry))\n" |
| 20590 | "\n" |
| 20591 | "static __inline int __attribute__((__always_inline__, __nodebug__))\n" |
| 20592 | "__builtin_tbegin_retry_nofloat_null (int __retry)\n" |
| 20593 | "{\n" |
| 20594 | " int cc, i = 0;\n" |
| 20595 | "\n" |
| 20596 | " while ((cc = __builtin_tbegin_nofloat(0)) == _HTM_TBEGIN_TRANSIENT\n" |
| 20597 | " && i++ < __retry)\n" |
| 20598 | " __builtin_tx_assist(i);\n" |
| 20599 | "\n" |
| 20600 | " return cc;\n" |
| 20601 | "}\n" |
| 20602 | "\n" |
| 20603 | "static __inline int __attribute__((__always_inline__, __nodebug__))\n" |
| 20604 | "__builtin_tbegin_retry_nofloat_tdb (void *__tdb, int __retry)\n" |
| 20605 | "{\n" |
| 20606 | " int cc, i = 0;\n" |
| 20607 | "\n" |
| 20608 | " while ((cc = __builtin_tbegin_nofloat(__tdb)) == _HTM_TBEGIN_TRANSIENT\n" |
| 20609 | " && i++ < __retry)\n" |
| 20610 | " __builtin_tx_assist(i);\n" |
| 20611 | "\n" |
| 20612 | " return cc;\n" |
| 20613 | "}\n" |
| 20614 | "\n" |
| 20615 | "#define __builtin_tbegin_retry_nofloat(tdb, retry) \\\n" |
| 20616 | " (__builtin_constant_p(tdb == 0) && tdb == 0 ? \\\n" |
| 20617 | " __builtin_tbegin_retry_nofloat_null(retry) : \\\n" |
| 20618 | " __builtin_tbegin_retry_nofloat_tdb(tdb, retry))\n" |
| 20619 | "\n" |
| 20620 | "#endif /* __s390__ */\n" |
| 20621 | "\n" |
| 20622 | "#endif /* __HTMINTRIN_H */\n" |
| 20623 | "" } , |
| 20624 | { "/builtins/htmxlintrin.h" , "/*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\\\n" |
| 20625 | " *\n" |
| 20626 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 20627 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 20628 | " * in the Software without restriction, including without limitation the rights\n" |
| 20629 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 20630 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 20631 | " * furnished to do so, subject to the following conditions:\n" |
| 20632 | " *\n" |
| 20633 | " * The above copyright notice and this permission notice shall be included in\n" |
| 20634 | " * all copies or substantial portions of the Software.\n" |
| 20635 | " *\n" |
| 20636 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 20637 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 20638 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 20639 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 20640 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 20641 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 20642 | " * THE SOFTWARE.\n" |
| 20643 | " *\n" |
| 20644 | "\\*===----------------------------------------------------------------------===*/\n" |
| 20645 | "\n" |
| 20646 | "#ifndef __HTMXLINTRIN_H\n" |
| 20647 | "#define __HTMXLINTRIN_H\n" |
| 20648 | "\n" |
| 20649 | "#ifndef __HTM__\n" |
| 20650 | "#error \"HTM instruction set not enabled\"\n" |
| 20651 | "#endif\n" |
| 20652 | "\n" |
| 20653 | "#include <htmintrin.h>\n" |
| 20654 | "\n" |
| 20655 | "#ifdef __powerpc__\n" |
| 20656 | "\n" |
| 20657 | "#ifdef __cplusplus\n" |
| 20658 | "extern \"C\" {\n" |
| 20659 | "#endif\n" |
| 20660 | "\n" |
| 20661 | "#define _TEXASR_PTR(TM_BUF) ((texasr_t *)((char *)(TM_BUF) + 0))\n" |
| 20662 | "#define _TEXASRU_PTR(TM_BUF) ((texasru_t *)((char *)(TM_BUF) + 0))\n" |
| 20663 | "#define _TEXASRL_PTR(TM_BUF) ((texasrl_t *)((char *)(TM_BUF) + 4))\n" |
| 20664 | "#define _TFIAR_PTR(TM_BUF) ((tfiar_t *)((char *)(TM_BUF) + 8))\n" |
| 20665 | "\n" |
| 20666 | "typedef char TM_buff_type[16];\n" |
| 20667 | "\n" |
| 20668 | "/* This macro can be used to determine whether a transaction was successfully\n" |
| 20669 | " started from the __TM_begin() and __TM_simple_begin() intrinsic functions\n" |
| 20670 | " below. */\n" |
| 20671 | "#define _HTM_TBEGIN_STARTED 1\n" |
| 20672 | "\n" |
| 20673 | "extern __inline long\n" |
| 20674 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20675 | "__TM_simple_begin (void)\n" |
| 20676 | "{\n" |
| 20677 | " if (__builtin_expect (__builtin_tbegin (0), 1))\n" |
| 20678 | " return _HTM_TBEGIN_STARTED;\n" |
| 20679 | " return 0;\n" |
| 20680 | "}\n" |
| 20681 | "\n" |
| 20682 | "extern __inline long\n" |
| 20683 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20684 | "__TM_begin (void* const __TM_buff)\n" |
| 20685 | "{\n" |
| 20686 | " *_TEXASRL_PTR (__TM_buff) = 0;\n" |
| 20687 | " if (__builtin_expect (__builtin_tbegin (0), 1))\n" |
| 20688 | " return _HTM_TBEGIN_STARTED;\n" |
| 20689 | "#ifdef __powerpc64__\n" |
| 20690 | " *_TEXASR_PTR (__TM_buff) = __builtin_get_texasr ();\n" |
| 20691 | "#else\n" |
| 20692 | " *_TEXASRU_PTR (__TM_buff) = __builtin_get_texasru ();\n" |
| 20693 | " *_TEXASRL_PTR (__TM_buff) = __builtin_get_texasr ();\n" |
| 20694 | "#endif\n" |
| 20695 | " *_TFIAR_PTR (__TM_buff) = __builtin_get_tfiar ();\n" |
| 20696 | " return 0;\n" |
| 20697 | "}\n" |
| 20698 | "\n" |
| 20699 | "extern __inline long\n" |
| 20700 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20701 | "__TM_end (void)\n" |
| 20702 | "{\n" |
| 20703 | " if (__builtin_expect (__builtin_tend (0), 1))\n" |
| 20704 | " return 1;\n" |
| 20705 | " return 0;\n" |
| 20706 | "}\n" |
| 20707 | "\n" |
| 20708 | "extern __inline void\n" |
| 20709 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20710 | "__TM_abort (void)\n" |
| 20711 | "{\n" |
| 20712 | " __builtin_tabort (0);\n" |
| 20713 | "}\n" |
| 20714 | "\n" |
| 20715 | "extern __inline void\n" |
| 20716 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20717 | "__TM_named_abort (unsigned char const __code)\n" |
| 20718 | "{\n" |
| 20719 | " __builtin_tabort (__code);\n" |
| 20720 | "}\n" |
| 20721 | "\n" |
| 20722 | "extern __inline void\n" |
| 20723 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20724 | "__TM_resume (void)\n" |
| 20725 | "{\n" |
| 20726 | " __builtin_tresume ();\n" |
| 20727 | "}\n" |
| 20728 | "\n" |
| 20729 | "extern __inline void\n" |
| 20730 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20731 | "__TM_suspend (void)\n" |
| 20732 | "{\n" |
| 20733 | " __builtin_tsuspend ();\n" |
| 20734 | "}\n" |
| 20735 | "\n" |
| 20736 | "extern __inline long\n" |
| 20737 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20738 | "__TM_is_user_abort (void* const __TM_buff)\n" |
| 20739 | "{\n" |
| 20740 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
| 20741 | " return _TEXASRU_ABORT (texasru);\n" |
| 20742 | "}\n" |
| 20743 | "\n" |
| 20744 | "extern __inline long\n" |
| 20745 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20746 | "__TM_is_named_user_abort (void* const __TM_buff, unsigned char *__code)\n" |
| 20747 | "{\n" |
| 20748 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
| 20749 | "\n" |
| 20750 | " *__code = _TEXASRU_FAILURE_CODE (texasru);\n" |
| 20751 | " return _TEXASRU_ABORT (texasru);\n" |
| 20752 | "}\n" |
| 20753 | "\n" |
| 20754 | "extern __inline long\n" |
| 20755 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20756 | "__TM_is_illegal (void* const __TM_buff)\n" |
| 20757 | "{\n" |
| 20758 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
| 20759 | " return _TEXASRU_DISALLOWED (texasru);\n" |
| 20760 | "}\n" |
| 20761 | "\n" |
| 20762 | "extern __inline long\n" |
| 20763 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20764 | "__TM_is_footprint_exceeded (void* const __TM_buff)\n" |
| 20765 | "{\n" |
| 20766 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
| 20767 | " return _TEXASRU_FOOTPRINT_OVERFLOW (texasru);\n" |
| 20768 | "}\n" |
| 20769 | "\n" |
| 20770 | "extern __inline long\n" |
| 20771 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20772 | "__TM_nesting_depth (void* const __TM_buff)\n" |
| 20773 | "{\n" |
| 20774 | " texasrl_t texasrl;\n" |
| 20775 | "\n" |
| 20776 | " if (_HTM_STATE (__builtin_ttest ()) == _HTM_NONTRANSACTIONAL)\n" |
| 20777 | " {\n" |
| 20778 | " texasrl = *_TEXASRL_PTR (__TM_buff);\n" |
| 20779 | " if (!_TEXASR_FAILURE_SUMMARY (texasrl))\n" |
| 20780 | " texasrl = 0;\n" |
| 20781 | " }\n" |
| 20782 | " else\n" |
| 20783 | " texasrl = (texasrl_t) __builtin_get_texasr ();\n" |
| 20784 | "\n" |
| 20785 | " return _TEXASR_TRANSACTION_LEVEL (texasrl);\n" |
| 20786 | "}\n" |
| 20787 | "\n" |
| 20788 | "extern __inline long\n" |
| 20789 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20790 | "__TM_is_nested_too_deep(void* const __TM_buff)\n" |
| 20791 | "{\n" |
| 20792 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
| 20793 | " return _TEXASRU_NESTING_OVERFLOW (texasru);\n" |
| 20794 | "}\n" |
| 20795 | "\n" |
| 20796 | "extern __inline long\n" |
| 20797 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20798 | "__TM_is_conflict(void* const __TM_buff)\n" |
| 20799 | "{\n" |
| 20800 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
| 20801 | " /* Return TEXASR bits 11 (Self-Induced Conflict) through\n" |
| 20802 | " 14 (Translation Invalidation Conflict). */\n" |
| 20803 | " return (_TEXASRU_EXTRACT_BITS (texasru, 14, 4)) ? 1 : 0;\n" |
| 20804 | "}\n" |
| 20805 | "\n" |
| 20806 | "extern __inline long\n" |
| 20807 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20808 | "__TM_is_failure_persistent(void* const __TM_buff)\n" |
| 20809 | "{\n" |
| 20810 | " texasru_t texasru = *_TEXASRU_PTR (__TM_buff);\n" |
| 20811 | " return _TEXASRU_FAILURE_PERSISTENT (texasru);\n" |
| 20812 | "}\n" |
| 20813 | "\n" |
| 20814 | "extern __inline long\n" |
| 20815 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20816 | "__TM_failure_address(void* const __TM_buff)\n" |
| 20817 | "{\n" |
| 20818 | " return *_TFIAR_PTR (__TM_buff);\n" |
| 20819 | "}\n" |
| 20820 | "\n" |
| 20821 | "extern __inline long long\n" |
| 20822 | "__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))\n" |
| 20823 | "__TM_failure_code(void* const __TM_buff)\n" |
| 20824 | "{\n" |
| 20825 | " return *_TEXASR_PTR (__TM_buff);\n" |
| 20826 | "}\n" |
| 20827 | "\n" |
| 20828 | "#ifdef __cplusplus\n" |
| 20829 | "}\n" |
| 20830 | "#endif\n" |
| 20831 | "\n" |
| 20832 | "#endif /* __powerpc__ */\n" |
| 20833 | "\n" |
| 20834 | "#ifdef __s390__\n" |
| 20835 | "\n" |
| 20836 | "#include <stdint.h>\n" |
| 20837 | "\n" |
| 20838 | "/* These intrinsics are being made available for compatibility with\n" |
| 20839 | " the IBM XL compiler. For documentation please see the \"z/OS XL\n" |
| 20840 | " C/C++ Programming Guide\" publicly available on the web. */\n" |
| 20841 | "\n" |
| 20842 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20843 | "__TM_simple_begin ()\n" |
| 20844 | "{\n" |
| 20845 | " return __builtin_tbegin_nofloat (0);\n" |
| 20846 | "}\n" |
| 20847 | "\n" |
| 20848 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20849 | "__TM_begin (void* const __tdb)\n" |
| 20850 | "{\n" |
| 20851 | " return __builtin_tbegin_nofloat (__tdb);\n" |
| 20852 | "}\n" |
| 20853 | "\n" |
| 20854 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20855 | "__TM_end ()\n" |
| 20856 | "{\n" |
| 20857 | " return __builtin_tend ();\n" |
| 20858 | "}\n" |
| 20859 | "\n" |
| 20860 | "static __inline void __attribute__((__always_inline__))\n" |
| 20861 | "__TM_abort ()\n" |
| 20862 | "{\n" |
| 20863 | " return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE);\n" |
| 20864 | "}\n" |
| 20865 | "\n" |
| 20866 | "static __inline void __attribute__((__always_inline__, __nodebug__))\n" |
| 20867 | "__TM_named_abort (unsigned char const __code)\n" |
| 20868 | "{\n" |
| 20869 | " return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + __code);\n" |
| 20870 | "}\n" |
| 20871 | "\n" |
| 20872 | "static __inline void __attribute__((__always_inline__, __nodebug__))\n" |
| 20873 | "__TM_non_transactional_store (void* const __addr, long long const __value)\n" |
| 20874 | "{\n" |
| 20875 | " __builtin_non_tx_store ((uint64_t*)__addr, (uint64_t)__value);\n" |
| 20876 | "}\n" |
| 20877 | "\n" |
| 20878 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20879 | "__TM_nesting_depth (void* const __tdb_ptr)\n" |
| 20880 | "{\n" |
| 20881 | " int depth = __builtin_tx_nesting_depth ();\n" |
| 20882 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20883 | "\n" |
| 20884 | " if (depth != 0)\n" |
| 20885 | " return depth;\n" |
| 20886 | "\n" |
| 20887 | " if (tdb->format != 1)\n" |
| 20888 | " return 0;\n" |
| 20889 | " return tdb->nesting_depth;\n" |
| 20890 | "}\n" |
| 20891 | "\n" |
| 20892 | "/* Transaction failure diagnostics */\n" |
| 20893 | "\n" |
| 20894 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20895 | "__TM_is_user_abort (void* const __tdb_ptr)\n" |
| 20896 | "{\n" |
| 20897 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20898 | "\n" |
| 20899 | " if (tdb->format != 1)\n" |
| 20900 | " return 0;\n" |
| 20901 | "\n" |
| 20902 | " return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);\n" |
| 20903 | "}\n" |
| 20904 | "\n" |
| 20905 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20906 | "__TM_is_named_user_abort (void* const __tdb_ptr, unsigned char* __code)\n" |
| 20907 | "{\n" |
| 20908 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20909 | "\n" |
| 20910 | " if (tdb->format != 1)\n" |
| 20911 | " return 0;\n" |
| 20912 | "\n" |
| 20913 | " if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)\n" |
| 20914 | " {\n" |
| 20915 | " *__code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;\n" |
| 20916 | " return 1;\n" |
| 20917 | " }\n" |
| 20918 | " return 0;\n" |
| 20919 | "}\n" |
| 20920 | "\n" |
| 20921 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20922 | "__TM_is_illegal (void* const __tdb_ptr)\n" |
| 20923 | "{\n" |
| 20924 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20925 | "\n" |
| 20926 | " return (tdb->format == 1\n" |
| 20927 | " && (tdb->abort_code == 4 /* unfiltered program interruption */\n" |
| 20928 | " || tdb->abort_code == 11 /* restricted instruction */));\n" |
| 20929 | "}\n" |
| 20930 | "\n" |
| 20931 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20932 | "__TM_is_footprint_exceeded (void* const __tdb_ptr)\n" |
| 20933 | "{\n" |
| 20934 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20935 | "\n" |
| 20936 | " return (tdb->format == 1\n" |
| 20937 | " && (tdb->abort_code == 7 /* fetch overflow */\n" |
| 20938 | " || tdb->abort_code == 8 /* store overflow */));\n" |
| 20939 | "}\n" |
| 20940 | "\n" |
| 20941 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20942 | "__TM_is_nested_too_deep (void* const __tdb_ptr)\n" |
| 20943 | "{\n" |
| 20944 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20945 | "\n" |
| 20946 | " return tdb->format == 1 && tdb->abort_code == 13; /* depth exceeded */\n" |
| 20947 | "}\n" |
| 20948 | "\n" |
| 20949 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20950 | "__TM_is_conflict (void* const __tdb_ptr)\n" |
| 20951 | "{\n" |
| 20952 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20953 | "\n" |
| 20954 | " return (tdb->format == 1\n" |
| 20955 | " && (tdb->abort_code == 9 /* fetch conflict */\n" |
| 20956 | " || tdb->abort_code == 10 /* store conflict */));\n" |
| 20957 | "}\n" |
| 20958 | "\n" |
| 20959 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20960 | "__TM_is_failure_persistent (long const __result)\n" |
| 20961 | "{\n" |
| 20962 | " return __result == _HTM_TBEGIN_PERSISTENT;\n" |
| 20963 | "}\n" |
| 20964 | "\n" |
| 20965 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20966 | "__TM_failure_address (void* const __tdb_ptr)\n" |
| 20967 | "{\n" |
| 20968 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20969 | " return tdb->atia;\n" |
| 20970 | "}\n" |
| 20971 | "\n" |
| 20972 | "static __inline long __attribute__((__always_inline__, __nodebug__))\n" |
| 20973 | "__TM_failure_code (void* const __tdb_ptr)\n" |
| 20974 | "{\n" |
| 20975 | " struct __htm_tdb *tdb = (struct __htm_tdb*)__tdb_ptr;\n" |
| 20976 | "\n" |
| 20977 | " return tdb->abort_code;\n" |
| 20978 | "}\n" |
| 20979 | "\n" |
| 20980 | "#endif /* __s390__ */\n" |
| 20981 | "\n" |
| 20982 | "#endif /* __HTMXLINTRIN_H */\n" |
| 20983 | "" } , |
| 20984 | { "/builtins/ia32intrin.h" , "/* ===-------- ia32intrin.h ---------------------------------------------------===\n" |
| 20985 | " *\n" |
| 20986 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 20987 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 20988 | " * in the Software without restriction, including without limitation the rights\n" |
| 20989 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 20990 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 20991 | " * furnished to do so, subject to the following conditions:\n" |
| 20992 | " *\n" |
| 20993 | " * The above copyright notice and this permission notice shall be included in\n" |
| 20994 | " * all copies or substantial portions of the Software.\n" |
| 20995 | " *\n" |
| 20996 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 20997 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 20998 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 20999 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 21000 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 21001 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 21002 | " * THE SOFTWARE.\n" |
| 21003 | " *\n" |
| 21004 | " *===-----------------------------------------------------------------------===\n" |
| 21005 | " */\n" |
| 21006 | "\n" |
| 21007 | "#ifndef __X86INTRIN_H\n" |
| 21008 | "#error \"Never use <ia32intrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 21009 | "#endif\n" |
| 21010 | "\n" |
| 21011 | "#ifndef __IA32INTRIN_H\n" |
| 21012 | "#define __IA32INTRIN_H\n" |
| 21013 | "\n" |
| 21014 | "#ifdef __x86_64__\n" |
| 21015 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n" |
| 21016 | "__readeflags(void)\n" |
| 21017 | "{\n" |
| 21018 | " return __builtin_ia32_readeflags_u64();\n" |
| 21019 | "}\n" |
| 21020 | "\n" |
| 21021 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
| 21022 | "__writeeflags(unsigned long long __f)\n" |
| 21023 | "{\n" |
| 21024 | " __builtin_ia32_writeeflags_u64(__f);\n" |
| 21025 | "}\n" |
| 21026 | "\n" |
| 21027 | "#else /* !__x86_64__ */\n" |
| 21028 | "static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))\n" |
| 21029 | "__readeflags(void)\n" |
| 21030 | "{\n" |
| 21031 | " return __builtin_ia32_readeflags_u32();\n" |
| 21032 | "}\n" |
| 21033 | "\n" |
| 21034 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
| 21035 | "__writeeflags(unsigned int __f)\n" |
| 21036 | "{\n" |
| 21037 | " __builtin_ia32_writeeflags_u32(__f);\n" |
| 21038 | "}\n" |
| 21039 | "#endif /* !__x86_64__ */\n" |
| 21040 | "\n" |
| 21041 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n" |
| 21042 | "__rdpmc(int __A) {\n" |
| 21043 | " return __builtin_ia32_rdpmc(__A);\n" |
| 21044 | "}\n" |
| 21045 | "\n" |
| 21046 | "/* __rdtscp */\n" |
| 21047 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))\n" |
| 21048 | "__rdtscp(unsigned int *__A) {\n" |
| 21049 | " return __builtin_ia32_rdtscp(__A);\n" |
| 21050 | "}\n" |
| 21051 | "\n" |
| 21052 | "#define _rdtsc() __rdtsc()\n" |
| 21053 | "\n" |
| 21054 | "#define _rdpmc(A) __rdpmc(A)\n" |
| 21055 | "\n" |
| 21056 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
| 21057 | "_wbinvd(void) {\n" |
| 21058 | " __builtin_ia32_wbinvd();\n" |
| 21059 | "}\n" |
| 21060 | "\n" |
| 21061 | "#endif /* __IA32INTRIN_H */\n" |
| 21062 | "" } , |
| 21063 | { "/builtins/immintrin.h" , "/*===---- immintrin.h - Intel intrinsics -----------------------------------===\n" |
| 21064 | " *\n" |
| 21065 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 21066 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 21067 | " * in the Software without restriction, including without limitation the rights\n" |
| 21068 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 21069 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 21070 | " * furnished to do so, subject to the following conditions:\n" |
| 21071 | " *\n" |
| 21072 | " * The above copyright notice and this permission notice shall be included in\n" |
| 21073 | " * all copies or substantial portions of the Software.\n" |
| 21074 | " *\n" |
| 21075 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 21076 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 21077 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 21078 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 21079 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 21080 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 21081 | " * THE SOFTWARE.\n" |
| 21082 | " *\n" |
| 21083 | " *===-----------------------------------------------------------------------===\n" |
| 21084 | " */\n" |
| 21085 | "\n" |
| 21086 | "#ifndef __IMMINTRIN_H\n" |
| 21087 | "#define __IMMINTRIN_H\n" |
| 21088 | "\n" |
| 21089 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)\n" |
| 21090 | "#include <mmintrin.h>\n" |
| 21091 | "#endif\n" |
| 21092 | "\n" |
| 21093 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)\n" |
| 21094 | "#include <xmmintrin.h>\n" |
| 21095 | "#endif\n" |
| 21096 | "\n" |
| 21097 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)\n" |
| 21098 | "#include <emmintrin.h>\n" |
| 21099 | "#endif\n" |
| 21100 | "\n" |
| 21101 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)\n" |
| 21102 | "#include <pmmintrin.h>\n" |
| 21103 | "#endif\n" |
| 21104 | "\n" |
| 21105 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)\n" |
| 21106 | "#include <tmmintrin.h>\n" |
| 21107 | "#endif\n" |
| 21108 | "\n" |
| 21109 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21110 | " (defined(__SSE4_2__) || defined(__SSE4_1__))\n" |
| 21111 | "#include <smmintrin.h>\n" |
| 21112 | "#endif\n" |
| 21113 | "\n" |
| 21114 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21115 | " (defined(__AES__) || defined(__PCLMUL__))\n" |
| 21116 | "#include <wmmintrin.h>\n" |
| 21117 | "#endif\n" |
| 21118 | "\n" |
| 21119 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)\n" |
| 21120 | "#include <clflushoptintrin.h>\n" |
| 21121 | "#endif\n" |
| 21122 | "\n" |
| 21123 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)\n" |
| 21124 | "#include <clwbintrin.h>\n" |
| 21125 | "#endif\n" |
| 21126 | "\n" |
| 21127 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)\n" |
| 21128 | "#include <avxintrin.h>\n" |
| 21129 | "#endif\n" |
| 21130 | "\n" |
| 21131 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)\n" |
| 21132 | "#include <avx2intrin.h>\n" |
| 21133 | "#endif\n" |
| 21134 | "\n" |
| 21135 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)\n" |
| 21136 | "#include <f16cintrin.h>\n" |
| 21137 | "#endif\n" |
| 21138 | "\n" |
| 21139 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)\n" |
| 21140 | "#include <vpclmulqdqintrin.h>\n" |
| 21141 | "#endif\n" |
| 21142 | "\n" |
| 21143 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)\n" |
| 21144 | "#include <bmiintrin.h>\n" |
| 21145 | "#endif\n" |
| 21146 | "\n" |
| 21147 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)\n" |
| 21148 | "#include <bmi2intrin.h>\n" |
| 21149 | "#endif\n" |
| 21150 | "\n" |
| 21151 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)\n" |
| 21152 | "#include <lzcntintrin.h>\n" |
| 21153 | "#endif\n" |
| 21154 | "\n" |
| 21155 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)\n" |
| 21156 | "#include <popcntintrin.h>\n" |
| 21157 | "#endif\n" |
| 21158 | "\n" |
| 21159 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)\n" |
| 21160 | "#include <fmaintrin.h>\n" |
| 21161 | "#endif\n" |
| 21162 | "\n" |
| 21163 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)\n" |
| 21164 | "#include <avx512fintrin.h>\n" |
| 21165 | "#endif\n" |
| 21166 | "\n" |
| 21167 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)\n" |
| 21168 | "#include <avx512vlintrin.h>\n" |
| 21169 | "#endif\n" |
| 21170 | "\n" |
| 21171 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)\n" |
| 21172 | "#include <avx512bwintrin.h>\n" |
| 21173 | "#endif\n" |
| 21174 | "\n" |
| 21175 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)\n" |
| 21176 | "#include <avx512bitalgintrin.h>\n" |
| 21177 | "#endif\n" |
| 21178 | "\n" |
| 21179 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)\n" |
| 21180 | "#include <avx512cdintrin.h>\n" |
| 21181 | "#endif\n" |
| 21182 | "\n" |
| 21183 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)\n" |
| 21184 | "#include <avx512vpopcntdqintrin.h>\n" |
| 21185 | "#endif\n" |
| 21186 | "\n" |
| 21187 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21188 | " (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))\n" |
| 21189 | "#include <avx512vpopcntdqvlintrin.h>\n" |
| 21190 | "#endif\n" |
| 21191 | "\n" |
| 21192 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)\n" |
| 21193 | "#include <avx512vnniintrin.h>\n" |
| 21194 | "#endif\n" |
| 21195 | "\n" |
| 21196 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21197 | " (defined(__AVX512VL__) && defined(__AVX512VNNI__))\n" |
| 21198 | "#include <avx512vlvnniintrin.h>\n" |
| 21199 | "#endif\n" |
| 21200 | "\n" |
| 21201 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)\n" |
| 21202 | "#include <avx512dqintrin.h>\n" |
| 21203 | "#endif\n" |
| 21204 | "\n" |
| 21205 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21206 | " (defined(__AVX512VL__) && defined(__AVX512BITALG__))\n" |
| 21207 | "#include <avx512vlbitalgintrin.h>\n" |
| 21208 | "#endif\n" |
| 21209 | "\n" |
| 21210 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21211 | " (defined(__AVX512VL__) && defined(__AVX512BW__))\n" |
| 21212 | "#include <avx512vlbwintrin.h>\n" |
| 21213 | "#endif\n" |
| 21214 | "\n" |
| 21215 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21216 | " (defined(__AVX512VL__) && defined(__AVX512CD__))\n" |
| 21217 | "#include <avx512vlcdintrin.h>\n" |
| 21218 | "#endif\n" |
| 21219 | "\n" |
| 21220 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21221 | " (defined(__AVX512VL__) && defined(__AVX512DQ__))\n" |
| 21222 | "#include <avx512vldqintrin.h>\n" |
| 21223 | "#endif\n" |
| 21224 | "\n" |
| 21225 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)\n" |
| 21226 | "#include <avx512erintrin.h>\n" |
| 21227 | "#endif\n" |
| 21228 | "\n" |
| 21229 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)\n" |
| 21230 | "#include <avx512ifmaintrin.h>\n" |
| 21231 | "#endif\n" |
| 21232 | "\n" |
| 21233 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21234 | " (defined(__AVX512IFMA__) && defined(__AVX512VL__))\n" |
| 21235 | "#include <avx512ifmavlintrin.h>\n" |
| 21236 | "#endif\n" |
| 21237 | "\n" |
| 21238 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)\n" |
| 21239 | "#include <avx512vbmiintrin.h>\n" |
| 21240 | "#endif\n" |
| 21241 | "\n" |
| 21242 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21243 | " (defined(__AVX512VBMI__) && defined(__AVX512VL__))\n" |
| 21244 | "#include <avx512vbmivlintrin.h>\n" |
| 21245 | "#endif\n" |
| 21246 | "\n" |
| 21247 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)\n" |
| 21248 | "#include <avx512vbmi2intrin.h>\n" |
| 21249 | "#endif\n" |
| 21250 | "\n" |
| 21251 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21252 | " (defined(__AVX512VBMI2__) && defined(__AVX512VL__))\n" |
| 21253 | "#include <avx512vlvbmi2intrin.h>\n" |
| 21254 | "#endif\n" |
| 21255 | "\n" |
| 21256 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)\n" |
| 21257 | "#include <avx512pfintrin.h>\n" |
| 21258 | "#endif\n" |
| 21259 | "\n" |
| 21260 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)\n" |
| 21261 | "#include <pkuintrin.h>\n" |
| 21262 | "#endif\n" |
| 21263 | "\n" |
| 21264 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)\n" |
| 21265 | "#include <vaesintrin.h>\n" |
| 21266 | "#endif\n" |
| 21267 | "\n" |
| 21268 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)\n" |
| 21269 | "#include <gfniintrin.h>\n" |
| 21270 | "#endif\n" |
| 21271 | "\n" |
| 21272 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)\n" |
| 21273 | "/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).\n" |
| 21274 | "///\n" |
| 21275 | "/// \\headerfile <immintrin.h>\n" |
| 21276 | "///\n" |
| 21277 | "/// This intrinsic corresponds to the <c> RDPID </c> instruction.\n" |
| 21278 | "static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"rdpid\")))\n" |
| 21279 | "_rdpid_u32(void) {\n" |
| 21280 | " return __builtin_ia32_rdpid();\n" |
| 21281 | "}\n" |
| 21282 | "#endif // __RDPID__\n" |
| 21283 | "\n" |
| 21284 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)\n" |
| 21285 | "static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n" |
| 21286 | "_rdrand16_step(unsigned short *__p)\n" |
| 21287 | "{\n" |
| 21288 | " return __builtin_ia32_rdrand16_step(__p);\n" |
| 21289 | "}\n" |
| 21290 | "\n" |
| 21291 | "static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n" |
| 21292 | "_rdrand32_step(unsigned int *__p)\n" |
| 21293 | "{\n" |
| 21294 | " return __builtin_ia32_rdrand32_step(__p);\n" |
| 21295 | "}\n" |
| 21296 | "\n" |
| 21297 | "#ifdef __x86_64__\n" |
| 21298 | "static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__(\"rdrnd\")))\n" |
| 21299 | "_rdrand64_step(unsigned long long *__p)\n" |
| 21300 | "{\n" |
| 21301 | " return __builtin_ia32_rdrand64_step(__p);\n" |
| 21302 | "}\n" |
| 21303 | "#endif\n" |
| 21304 | "#endif /* __RDRND__ */\n" |
| 21305 | "\n" |
| 21306 | "/* __bit_scan_forward */\n" |
| 21307 | "static __inline__ int __attribute__((__always_inline__, __nodebug__))\n" |
| 21308 | "_bit_scan_forward(int __A) {\n" |
| 21309 | " return __builtin_ctz(__A);\n" |
| 21310 | "}\n" |
| 21311 | "\n" |
| 21312 | "/* __bit_scan_reverse */\n" |
| 21313 | "static __inline__ int __attribute__((__always_inline__, __nodebug__))\n" |
| 21314 | "_bit_scan_reverse(int __A) {\n" |
| 21315 | " return 31 - __builtin_clz(__A);\n" |
| 21316 | "}\n" |
| 21317 | "\n" |
| 21318 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)\n" |
| 21319 | "#ifdef __x86_64__\n" |
| 21320 | "static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
| 21321 | "_readfsbase_u32(void)\n" |
| 21322 | "{\n" |
| 21323 | " return __builtin_ia32_rdfsbase32();\n" |
| 21324 | "}\n" |
| 21325 | "\n" |
| 21326 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
| 21327 | "_readfsbase_u64(void)\n" |
| 21328 | "{\n" |
| 21329 | " return __builtin_ia32_rdfsbase64();\n" |
| 21330 | "}\n" |
| 21331 | "\n" |
| 21332 | "static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
| 21333 | "_readgsbase_u32(void)\n" |
| 21334 | "{\n" |
| 21335 | " return __builtin_ia32_rdgsbase32();\n" |
| 21336 | "}\n" |
| 21337 | "\n" |
| 21338 | "static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
| 21339 | "_readgsbase_u64(void)\n" |
| 21340 | "{\n" |
| 21341 | " return __builtin_ia32_rdgsbase64();\n" |
| 21342 | "}\n" |
| 21343 | "\n" |
| 21344 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
| 21345 | "_writefsbase_u32(unsigned int __V)\n" |
| 21346 | "{\n" |
| 21347 | " __builtin_ia32_wrfsbase32(__V);\n" |
| 21348 | "}\n" |
| 21349 | "\n" |
| 21350 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
| 21351 | "_writefsbase_u64(unsigned long long __V)\n" |
| 21352 | "{\n" |
| 21353 | " __builtin_ia32_wrfsbase64(__V);\n" |
| 21354 | "}\n" |
| 21355 | "\n" |
| 21356 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
| 21357 | "_writegsbase_u32(unsigned int __V)\n" |
| 21358 | "{\n" |
| 21359 | " __builtin_ia32_wrgsbase32(__V);\n" |
| 21360 | "}\n" |
| 21361 | "\n" |
| 21362 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"fsgsbase\")))\n" |
| 21363 | "_writegsbase_u64(unsigned long long __V)\n" |
| 21364 | "{\n" |
| 21365 | " __builtin_ia32_wrgsbase64(__V);\n" |
| 21366 | "}\n" |
| 21367 | "\n" |
| 21368 | "#endif\n" |
| 21369 | "#endif /* __FSGSBASE__ */\n" |
| 21370 | "\n" |
| 21371 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)\n" |
| 21372 | "#include <rtmintrin.h>\n" |
| 21373 | "#include <xtestintrin.h>\n" |
| 21374 | "#endif\n" |
| 21375 | "\n" |
| 21376 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)\n" |
| 21377 | "#include <shaintrin.h>\n" |
| 21378 | "#endif\n" |
| 21379 | "\n" |
| 21380 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)\n" |
| 21381 | "#include <fxsrintrin.h>\n" |
| 21382 | "#endif\n" |
| 21383 | "\n" |
| 21384 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)\n" |
| 21385 | "#include <xsaveintrin.h>\n" |
| 21386 | "#endif\n" |
| 21387 | "\n" |
| 21388 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)\n" |
| 21389 | "#include <xsaveoptintrin.h>\n" |
| 21390 | "#endif\n" |
| 21391 | "\n" |
| 21392 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)\n" |
| 21393 | "#include <xsavecintrin.h>\n" |
| 21394 | "#endif\n" |
| 21395 | "\n" |
| 21396 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)\n" |
| 21397 | "#include <xsavesintrin.h>\n" |
| 21398 | "#endif\n" |
| 21399 | "\n" |
| 21400 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)\n" |
| 21401 | "#include <cetintrin.h>\n" |
| 21402 | "#endif\n" |
| 21403 | "\n" |
| 21404 | "/* Some intrinsics inside adxintrin.h are available only on processors with ADX,\n" |
| 21405 | " * whereas others are also available at all times. */\n" |
| 21406 | "#include <adxintrin.h>\n" |
| 21407 | "\n" |
| 21408 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)\n" |
| 21409 | "#include <rdseedintrin.h>\n" |
| 21410 | "#endif\n" |
| 21411 | "\n" |
| 21412 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)\n" |
| 21413 | "#include <wbnoinvdintrin.h>\n" |
| 21414 | "#endif\n" |
| 21415 | "\n" |
| 21416 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)\n" |
| 21417 | "#include <cldemoteintrin.h>\n" |
| 21418 | "#endif\n" |
| 21419 | "\n" |
| 21420 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)\n" |
| 21421 | "#include <waitpkgintrin.h>\n" |
| 21422 | "#endif\n" |
| 21423 | "\n" |
| 21424 | "#if !defined(_MSC_VER) || __has_feature(modules) || \\\n" |
| 21425 | " defined(__MOVDIRI__) || defined(__MOVDIR64B__)\n" |
| 21426 | "#include <movdirintrin.h>\n" |
| 21427 | "#endif\n" |
| 21428 | "\n" |
| 21429 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)\n" |
| 21430 | "#include <pconfigintrin.h>\n" |
| 21431 | "#endif\n" |
| 21432 | "\n" |
| 21433 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)\n" |
| 21434 | "#include <sgxintrin.h>\n" |
| 21435 | "#endif\n" |
| 21436 | "\n" |
| 21437 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)\n" |
| 21438 | "#include <ptwriteintrin.h>\n" |
| 21439 | "#endif\n" |
| 21440 | "\n" |
| 21441 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)\n" |
| 21442 | "#include <invpcidintrin.h>\n" |
| 21443 | "#endif\n" |
| 21444 | "\n" |
| 21445 | "#ifdef _MSC_VER\n" |
| 21446 | "/* Define the default attributes for these intrinsics */\n" |
| 21447 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n" |
| 21448 | "#ifdef __cplusplus\n" |
| 21449 | "extern \"C\" {\n" |
| 21450 | "#endif\n" |
| 21451 | "/*----------------------------------------------------------------------------*\\\n" |
| 21452 | "|* Interlocked Exchange HLE\n" |
| 21453 | "\\*----------------------------------------------------------------------------*/\n" |
| 21454 | "#if defined(__i386__) || defined(__x86_64__)\n" |
| 21455 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21456 | "_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {\n" |
| 21457 | " __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n" |
| 21458 | " : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n" |
| 21459 | " return _Value;\n" |
| 21460 | "}\n" |
| 21461 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21462 | "_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {\n" |
| 21463 | " __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n" |
| 21464 | " : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n" |
| 21465 | " return _Value;\n" |
| 21466 | "}\n" |
| 21467 | "#endif\n" |
| 21468 | "#if defined(__x86_64__)\n" |
| 21469 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21470 | "_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {\n" |
| 21471 | " __asm__ __volatile__(\".byte 0xf2 ; lock ; xchg %0, %1\"\n" |
| 21472 | " : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n" |
| 21473 | " return _Value;\n" |
| 21474 | "}\n" |
| 21475 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21476 | "_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {\n" |
| 21477 | " __asm__ __volatile__(\".byte 0xf3 ; lock ; xchg %0, %1\"\n" |
| 21478 | " : \"+r\" (_Value), \"+m\" (*_Target) :: \"memory\");\n" |
| 21479 | " return _Value;\n" |
| 21480 | "}\n" |
| 21481 | "#endif\n" |
| 21482 | "/*----------------------------------------------------------------------------*\\\n" |
| 21483 | "|* Interlocked Compare Exchange HLE\n" |
| 21484 | "\\*----------------------------------------------------------------------------*/\n" |
| 21485 | "#if defined(__i386__) || defined(__x86_64__)\n" |
| 21486 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21487 | "_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,\n" |
| 21488 | " long _Exchange, long _Comparand) {\n" |
| 21489 | " __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n" |
| 21490 | " : \"+a\" (_Comparand), \"+m\" (*_Destination)\n" |
| 21491 | " : \"r\" (_Exchange) : \"memory\");\n" |
| 21492 | " return _Comparand;\n" |
| 21493 | "}\n" |
| 21494 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21495 | "_InterlockedCompareExchange_HLERelease(long volatile *_Destination,\n" |
| 21496 | " long _Exchange, long _Comparand) {\n" |
| 21497 | " __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n" |
| 21498 | " : \"+a\" (_Comparand), \"+m\" (*_Destination)\n" |
| 21499 | " : \"r\" (_Exchange) : \"memory\");\n" |
| 21500 | " return _Comparand;\n" |
| 21501 | "}\n" |
| 21502 | "#endif\n" |
| 21503 | "#if defined(__x86_64__)\n" |
| 21504 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21505 | "_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,\n" |
| 21506 | " __int64 _Exchange, __int64 _Comparand) {\n" |
| 21507 | " __asm__ __volatile__(\".byte 0xf2 ; lock ; cmpxchg %2, %1\"\n" |
| 21508 | " : \"+a\" (_Comparand), \"+m\" (*_Destination)\n" |
| 21509 | " : \"r\" (_Exchange) : \"memory\");\n" |
| 21510 | " return _Comparand;\n" |
| 21511 | "}\n" |
| 21512 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21513 | "_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,\n" |
| 21514 | " __int64 _Exchange, __int64 _Comparand) {\n" |
| 21515 | " __asm__ __volatile__(\".byte 0xf3 ; lock ; cmpxchg %2, %1\"\n" |
| 21516 | " : \"+a\" (_Comparand), \"+m\" (*_Destination)\n" |
| 21517 | " : \"r\" (_Exchange) : \"memory\");\n" |
| 21518 | " return _Comparand;\n" |
| 21519 | "}\n" |
| 21520 | "#endif\n" |
| 21521 | "#ifdef __cplusplus\n" |
| 21522 | "}\n" |
| 21523 | "#endif\n" |
| 21524 | "\n" |
| 21525 | "#undef __DEFAULT_FN_ATTRS\n" |
| 21526 | "\n" |
| 21527 | "#endif /* _MSC_VER */\n" |
| 21528 | "\n" |
| 21529 | "#endif /* __IMMINTRIN_H */\n" |
| 21530 | "" } , |
| 21531 | { "/builtins/intrin.h" , "/* ===-------- intrin.h ---------------------------------------------------===\n" |
| 21532 | " *\n" |
| 21533 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 21534 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 21535 | " * in the Software without restriction, including without limitation the rights\n" |
| 21536 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 21537 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 21538 | " * furnished to do so, subject to the following conditions:\n" |
| 21539 | " *\n" |
| 21540 | " * The above copyright notice and this permission notice shall be included in\n" |
| 21541 | " * all copies or substantial portions of the Software.\n" |
| 21542 | " *\n" |
| 21543 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 21544 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 21545 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 21546 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 21547 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 21548 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 21549 | " * THE SOFTWARE.\n" |
| 21550 | " *\n" |
| 21551 | " *===-----------------------------------------------------------------------===\n" |
| 21552 | " */\n" |
| 21553 | "\n" |
| 21554 | "/* Only include this if we're compiling for the windows platform. */\n" |
| 21555 | "#ifndef _MSC_VER\n" |
| 21556 | "#include_next <intrin.h>\n" |
| 21557 | "#else\n" |
| 21558 | "\n" |
| 21559 | "#ifndef __INTRIN_H\n" |
| 21560 | "#define __INTRIN_H\n" |
| 21561 | "\n" |
| 21562 | "/* First include the standard intrinsics. */\n" |
| 21563 | "#if defined(__i386__) || defined(__x86_64__)\n" |
| 21564 | "#include <x86intrin.h>\n" |
| 21565 | "#endif\n" |
| 21566 | "\n" |
| 21567 | "#if defined(__arm__)\n" |
| 21568 | "#include <armintr.h>\n" |
| 21569 | "#endif\n" |
| 21570 | "\n" |
| 21571 | "#if defined(__aarch64__)\n" |
| 21572 | "#include <arm64intr.h>\n" |
| 21573 | "#endif\n" |
| 21574 | "\n" |
| 21575 | "/* For the definition of jmp_buf. */\n" |
| 21576 | "#if __STDC_HOSTED__\n" |
| 21577 | "#include <setjmp.h>\n" |
| 21578 | "#endif\n" |
| 21579 | "\n" |
| 21580 | "/* Define the default attributes for the functions in this file. */\n" |
| 21581 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))\n" |
| 21582 | "\n" |
| 21583 | "#ifdef __cplusplus\n" |
| 21584 | "extern \"C\" {\n" |
| 21585 | "#endif\n" |
| 21586 | "\n" |
| 21587 | "#if defined(__MMX__)\n" |
| 21588 | "/* And the random ones that aren't in those files. */\n" |
| 21589 | "__m64 _m_from_float(float);\n" |
| 21590 | "float _m_to_float(__m64);\n" |
| 21591 | "#endif\n" |
| 21592 | "\n" |
| 21593 | "/* Other assorted instruction intrinsics. */\n" |
| 21594 | "void __addfsbyte(unsigned long, unsigned char);\n" |
| 21595 | "void __addfsdword(unsigned long, unsigned long);\n" |
| 21596 | "void __addfsword(unsigned long, unsigned short);\n" |
| 21597 | "void __code_seg(const char *);\n" |
| 21598 | "static __inline__\n" |
| 21599 | "void __cpuid(int[4], int);\n" |
| 21600 | "static __inline__\n" |
| 21601 | "void __cpuidex(int[4], int, int);\n" |
| 21602 | "static __inline__\n" |
| 21603 | "__int64 __emul(int, int);\n" |
| 21604 | "static __inline__\n" |
| 21605 | "unsigned __int64 __emulu(unsigned int, unsigned int);\n" |
| 21606 | "unsigned int __getcallerseflags(void);\n" |
| 21607 | "static __inline__\n" |
| 21608 | "void __halt(void);\n" |
| 21609 | "unsigned char __inbyte(unsigned short);\n" |
| 21610 | "void __inbytestring(unsigned short, unsigned char *, unsigned long);\n" |
| 21611 | "void __incfsbyte(unsigned long);\n" |
| 21612 | "void __incfsdword(unsigned long);\n" |
| 21613 | "void __incfsword(unsigned long);\n" |
| 21614 | "unsigned long __indword(unsigned short);\n" |
| 21615 | "void __indwordstring(unsigned short, unsigned long *, unsigned long);\n" |
| 21616 | "void __int2c(void);\n" |
| 21617 | "void __invlpg(void *);\n" |
| 21618 | "unsigned short __inword(unsigned short);\n" |
| 21619 | "void __inwordstring(unsigned short, unsigned short *, unsigned long);\n" |
| 21620 | "void __lidt(void *);\n" |
| 21621 | "unsigned __int64 __ll_lshift(unsigned __int64, int);\n" |
| 21622 | "__int64 __ll_rshift(__int64, int);\n" |
| 21623 | "unsigned int __lzcnt(unsigned int);\n" |
| 21624 | "unsigned short __lzcnt16(unsigned short);\n" |
| 21625 | "static __inline__\n" |
| 21626 | "void __movsb(unsigned char *, unsigned char const *, size_t);\n" |
| 21627 | "static __inline__\n" |
| 21628 | "void __movsd(unsigned long *, unsigned long const *, size_t);\n" |
| 21629 | "static __inline__\n" |
| 21630 | "void __movsw(unsigned short *, unsigned short const *, size_t);\n" |
| 21631 | "static __inline__\n" |
| 21632 | "void __nop(void);\n" |
| 21633 | "void __nvreg_restore_fence(void);\n" |
| 21634 | "void __nvreg_save_fence(void);\n" |
| 21635 | "void __outbyte(unsigned short, unsigned char);\n" |
| 21636 | "void __outbytestring(unsigned short, unsigned char *, unsigned long);\n" |
| 21637 | "void __outdword(unsigned short, unsigned long);\n" |
| 21638 | "void __outdwordstring(unsigned short, unsigned long *, unsigned long);\n" |
| 21639 | "void __outword(unsigned short, unsigned short);\n" |
| 21640 | "void __outwordstring(unsigned short, unsigned short *, unsigned long);\n" |
| 21641 | "unsigned long __readcr0(void);\n" |
| 21642 | "unsigned long __readcr2(void);\n" |
| 21643 | "static __inline__\n" |
| 21644 | "unsigned long __readcr3(void);\n" |
| 21645 | "unsigned long __readcr4(void);\n" |
| 21646 | "unsigned long __readcr8(void);\n" |
| 21647 | "unsigned int __readdr(unsigned int);\n" |
| 21648 | "#ifdef __i386__\n" |
| 21649 | "static __inline__\n" |
| 21650 | "unsigned char __readfsbyte(unsigned long);\n" |
| 21651 | "static __inline__\n" |
| 21652 | "unsigned __int64 __readfsqword(unsigned long);\n" |
| 21653 | "static __inline__\n" |
| 21654 | "unsigned short __readfsword(unsigned long);\n" |
| 21655 | "#endif\n" |
| 21656 | "static __inline__\n" |
| 21657 | "unsigned __int64 __readmsr(unsigned long);\n" |
| 21658 | "unsigned __int64 __readpmc(unsigned long);\n" |
| 21659 | "unsigned long __segmentlimit(unsigned long);\n" |
| 21660 | "void __sidt(void *);\n" |
| 21661 | "static __inline__\n" |
| 21662 | "void __stosb(unsigned char *, unsigned char, size_t);\n" |
| 21663 | "static __inline__\n" |
| 21664 | "void __stosd(unsigned long *, unsigned long, size_t);\n" |
| 21665 | "static __inline__\n" |
| 21666 | "void __stosw(unsigned short *, unsigned short, size_t);\n" |
| 21667 | "void __svm_clgi(void);\n" |
| 21668 | "void __svm_invlpga(void *, int);\n" |
| 21669 | "void __svm_skinit(int);\n" |
| 21670 | "void __svm_stgi(void);\n" |
| 21671 | "void __svm_vmload(size_t);\n" |
| 21672 | "void __svm_vmrun(size_t);\n" |
| 21673 | "void __svm_vmsave(size_t);\n" |
| 21674 | "void __ud2(void);\n" |
| 21675 | "unsigned __int64 __ull_rshift(unsigned __int64, int);\n" |
| 21676 | "void __vmx_off(void);\n" |
| 21677 | "void __vmx_vmptrst(unsigned __int64 *);\n" |
| 21678 | "void __wbinvd(void);\n" |
| 21679 | "void __writecr0(unsigned int);\n" |
| 21680 | "static __inline__\n" |
| 21681 | "void __writecr3(unsigned int);\n" |
| 21682 | "void __writecr4(unsigned int);\n" |
| 21683 | "void __writecr8(unsigned int);\n" |
| 21684 | "void __writedr(unsigned int, unsigned int);\n" |
| 21685 | "void __writefsbyte(unsigned long, unsigned char);\n" |
| 21686 | "void __writefsdword(unsigned long, unsigned long);\n" |
| 21687 | "void __writefsqword(unsigned long, unsigned __int64);\n" |
| 21688 | "void __writefsword(unsigned long, unsigned short);\n" |
| 21689 | "void __writemsr(unsigned long, unsigned __int64);\n" |
| 21690 | "static __inline__\n" |
| 21691 | "void *_AddressOfReturnAddress(void);\n" |
| 21692 | "static __inline__\n" |
| 21693 | "unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);\n" |
| 21694 | "static __inline__\n" |
| 21695 | "unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);\n" |
| 21696 | "unsigned char _bittest(long const *, long);\n" |
| 21697 | "unsigned char _bittestandcomplement(long *, long);\n" |
| 21698 | "unsigned char _bittestandreset(long *, long);\n" |
| 21699 | "unsigned char _bittestandset(long *, long);\n" |
| 21700 | "void __cdecl _disable(void);\n" |
| 21701 | "void __cdecl _enable(void);\n" |
| 21702 | "long _InterlockedAddLargeStatistic(__int64 volatile *_Addend, long _Value);\n" |
| 21703 | "unsigned char _interlockedbittestandreset(long volatile *, long);\n" |
| 21704 | "unsigned char _interlockedbittestandset(long volatile *, long);\n" |
| 21705 | "void *_InterlockedCompareExchangePointer_HLEAcquire(void *volatile *, void *,\n" |
| 21706 | " void *);\n" |
| 21707 | "void *_InterlockedCompareExchangePointer_HLERelease(void *volatile *, void *,\n" |
| 21708 | " void *);\n" |
| 21709 | "long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);\n" |
| 21710 | "long _InterlockedExchangeAdd_HLERelease(long volatile *, long);\n" |
| 21711 | "__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);\n" |
| 21712 | "__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);\n" |
| 21713 | "void __cdecl _invpcid(unsigned int, void *);\n" |
| 21714 | "static __inline__ void\n" |
| 21715 | "__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n" |
| 21716 | "_ReadBarrier(void);\n" |
| 21717 | "static __inline__ void\n" |
| 21718 | "__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n" |
| 21719 | "_ReadWriteBarrier(void);\n" |
| 21720 | "unsigned int _rorx_u32(unsigned int, const unsigned int);\n" |
| 21721 | "int _sarx_i32(int, unsigned int);\n" |
| 21722 | "#if __STDC_HOSTED__\n" |
| 21723 | "int __cdecl _setjmp(jmp_buf);\n" |
| 21724 | "#endif\n" |
| 21725 | "unsigned int _shlx_u32(unsigned int, unsigned int);\n" |
| 21726 | "unsigned int _shrx_u32(unsigned int, unsigned int);\n" |
| 21727 | "void _Store_HLERelease(long volatile *, long);\n" |
| 21728 | "void _Store64_HLERelease(__int64 volatile *, __int64);\n" |
| 21729 | "void _StorePointer_HLERelease(void *volatile *, void *);\n" |
| 21730 | "static __inline__ void\n" |
| 21731 | "__attribute__((__deprecated__(\"use other intrinsics or C++11 atomics instead\")))\n" |
| 21732 | "_WriteBarrier(void);\n" |
| 21733 | "unsigned __int32 xbegin(void);\n" |
| 21734 | "void _xend(void);\n" |
| 21735 | "static __inline__\n" |
| 21736 | "#define _XCR_XFEATURE_ENABLED_MASK 0\n" |
| 21737 | "unsigned __int64 __cdecl _xgetbv(unsigned int);\n" |
| 21738 | "void __cdecl _xsetbv(unsigned int, unsigned __int64);\n" |
| 21739 | "\n" |
| 21740 | "/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */\n" |
| 21741 | "#ifdef __x86_64__\n" |
| 21742 | "void __addgsbyte(unsigned long, unsigned char);\n" |
| 21743 | "void __addgsdword(unsigned long, unsigned long);\n" |
| 21744 | "void __addgsqword(unsigned long, unsigned __int64);\n" |
| 21745 | "void __addgsword(unsigned long, unsigned short);\n" |
| 21746 | "static __inline__\n" |
| 21747 | "void __faststorefence(void);\n" |
| 21748 | "void __incgsbyte(unsigned long);\n" |
| 21749 | "void __incgsdword(unsigned long);\n" |
| 21750 | "void __incgsqword(unsigned long);\n" |
| 21751 | "void __incgsword(unsigned long);\n" |
| 21752 | "unsigned __int64 __lzcnt64(unsigned __int64);\n" |
| 21753 | "static __inline__\n" |
| 21754 | "void __movsq(unsigned long long *, unsigned long long const *, size_t);\n" |
| 21755 | "static __inline__\n" |
| 21756 | "unsigned char __readgsbyte(unsigned long);\n" |
| 21757 | "static __inline__\n" |
| 21758 | "unsigned long __readgsdword(unsigned long);\n" |
| 21759 | "static __inline__\n" |
| 21760 | "unsigned __int64 __readgsqword(unsigned long);\n" |
| 21761 | "unsigned short __readgsword(unsigned long);\n" |
| 21762 | "unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,\n" |
| 21763 | " unsigned __int64 _HighPart,\n" |
| 21764 | " unsigned char _Shift);\n" |
| 21765 | "unsigned __int64 __shiftright128(unsigned __int64 _LowPart,\n" |
| 21766 | " unsigned __int64 _HighPart,\n" |
| 21767 | " unsigned char _Shift);\n" |
| 21768 | "static __inline__\n" |
| 21769 | "void __stosq(unsigned __int64 *, unsigned __int64, size_t);\n" |
| 21770 | "unsigned char __vmx_on(unsigned __int64 *);\n" |
| 21771 | "unsigned char __vmx_vmclear(unsigned __int64 *);\n" |
| 21772 | "unsigned char __vmx_vmlaunch(void);\n" |
| 21773 | "unsigned char __vmx_vmptrld(unsigned __int64 *);\n" |
| 21774 | "unsigned char __vmx_vmread(size_t, size_t *);\n" |
| 21775 | "unsigned char __vmx_vmresume(void);\n" |
| 21776 | "unsigned char __vmx_vmwrite(size_t, size_t);\n" |
| 21777 | "void __writegsbyte(unsigned long, unsigned char);\n" |
| 21778 | "void __writegsdword(unsigned long, unsigned long);\n" |
| 21779 | "void __writegsqword(unsigned long, unsigned __int64);\n" |
| 21780 | "void __writegsword(unsigned long, unsigned short);\n" |
| 21781 | "unsigned char _bittest64(__int64 const *, __int64);\n" |
| 21782 | "unsigned char _bittestandcomplement64(__int64 *, __int64);\n" |
| 21783 | "unsigned char _bittestandreset64(__int64 *, __int64);\n" |
| 21784 | "unsigned char _bittestandset64(__int64 *, __int64);\n" |
| 21785 | "long _InterlockedAnd_np(long volatile *_Value, long _Mask);\n" |
| 21786 | "short _InterlockedAnd16_np(short volatile *_Value, short _Mask);\n" |
| 21787 | "__int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);\n" |
| 21788 | "char _InterlockedAnd8_np(char volatile *_Value, char _Mask);\n" |
| 21789 | "unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);\n" |
| 21790 | "unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);\n" |
| 21791 | "long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,\n" |
| 21792 | " long _Comparand);\n" |
| 21793 | "unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,\n" |
| 21794 | " __int64 _ExchangeHigh,\n" |
| 21795 | " __int64 _ExchangeLow,\n" |
| 21796 | " __int64 *_CompareandResult);\n" |
| 21797 | "unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,\n" |
| 21798 | " __int64 _ExchangeHigh,\n" |
| 21799 | " __int64 _ExchangeLow,\n" |
| 21800 | " __int64 *_ComparandResult);\n" |
| 21801 | "short _InterlockedCompareExchange16_np(short volatile *_Destination,\n" |
| 21802 | " short _Exchange, short _Comparand);\n" |
| 21803 | "__int64 _InterlockedCompareExchange64_np(__int64 volatile *_Destination,\n" |
| 21804 | " __int64 _Exchange, __int64 _Comparand);\n" |
| 21805 | "void *_InterlockedCompareExchangePointer_np(void *volatile *_Destination,\n" |
| 21806 | " void *_Exchange, void *_Comparand);\n" |
| 21807 | "long _InterlockedOr_np(long volatile *_Value, long _Mask);\n" |
| 21808 | "short _InterlockedOr16_np(short volatile *_Value, short _Mask);\n" |
| 21809 | "__int64 _InterlockedOr64_np(__int64 volatile *_Value, __int64 _Mask);\n" |
| 21810 | "char _InterlockedOr8_np(char volatile *_Value, char _Mask);\n" |
| 21811 | "long _InterlockedXor_np(long volatile *_Value, long _Mask);\n" |
| 21812 | "short _InterlockedXor16_np(short volatile *_Value, short _Mask);\n" |
| 21813 | "__int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);\n" |
| 21814 | "char _InterlockedXor8_np(char volatile *_Value, char _Mask);\n" |
| 21815 | "unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);\n" |
| 21816 | "__int64 _sarx_i64(__int64, unsigned int);\n" |
| 21817 | "unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);\n" |
| 21818 | "unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);\n" |
| 21819 | "static __inline__\n" |
| 21820 | "__int64 __mulh(__int64, __int64);\n" |
| 21821 | "static __inline__\n" |
| 21822 | "unsigned __int64 __umulh(unsigned __int64, unsigned __int64);\n" |
| 21823 | "static __inline__\n" |
| 21824 | "__int64 _mul128(__int64, __int64, __int64*);\n" |
| 21825 | "static __inline__\n" |
| 21826 | "unsigned __int64 _umul128(unsigned __int64,\n" |
| 21827 | " unsigned __int64,\n" |
| 21828 | " unsigned __int64*);\n" |
| 21829 | "\n" |
| 21830 | "#endif /* __x86_64__ */\n" |
| 21831 | "\n" |
| 21832 | "#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)\n" |
| 21833 | "\n" |
| 21834 | "static __inline__\n" |
| 21835 | "unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);\n" |
| 21836 | "static __inline__\n" |
| 21837 | "unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);\n" |
| 21838 | "\n" |
| 21839 | "static __inline__\n" |
| 21840 | "__int64 _InterlockedDecrement64(__int64 volatile *_Addend);\n" |
| 21841 | "static __inline__\n" |
| 21842 | "__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);\n" |
| 21843 | "static __inline__\n" |
| 21844 | "__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);\n" |
| 21845 | "static __inline__\n" |
| 21846 | "__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);\n" |
| 21847 | "static __inline__\n" |
| 21848 | "__int64 _InterlockedIncrement64(__int64 volatile *_Addend);\n" |
| 21849 | "static __inline__\n" |
| 21850 | "__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);\n" |
| 21851 | "static __inline__\n" |
| 21852 | "__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);\n" |
| 21853 | "static __inline__\n" |
| 21854 | "__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);\n" |
| 21855 | "\n" |
| 21856 | "#endif\n" |
| 21857 | "\n" |
| 21858 | "/*----------------------------------------------------------------------------*\\\n" |
| 21859 | "|* Interlocked Exchange Add\n" |
| 21860 | "\\*----------------------------------------------------------------------------*/\n" |
| 21861 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 21862 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 21863 | "_InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value) {\n" |
| 21864 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);\n" |
| 21865 | "}\n" |
| 21866 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 21867 | "_InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value) {\n" |
| 21868 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n" |
| 21869 | "}\n" |
| 21870 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 21871 | "_InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value) {\n" |
| 21872 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n" |
| 21873 | "}\n" |
| 21874 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21875 | "_InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value) {\n" |
| 21876 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);\n" |
| 21877 | "}\n" |
| 21878 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21879 | "_InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value) {\n" |
| 21880 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n" |
| 21881 | "}\n" |
| 21882 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21883 | "_InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value) {\n" |
| 21884 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);\n" |
| 21885 | "}\n" |
| 21886 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21887 | "_InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value) {\n" |
| 21888 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);\n" |
| 21889 | "}\n" |
| 21890 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21891 | "_InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value) {\n" |
| 21892 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n" |
| 21893 | "}\n" |
| 21894 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21895 | "_InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value) {\n" |
| 21896 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);\n" |
| 21897 | "}\n" |
| 21898 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21899 | "_InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value) {\n" |
| 21900 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_ACQUIRE);\n" |
| 21901 | "}\n" |
| 21902 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21903 | "_InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value) {\n" |
| 21904 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELAXED);\n" |
| 21905 | "}\n" |
| 21906 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21907 | "_InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value) {\n" |
| 21908 | " return __atomic_fetch_add(_Addend, _Value, __ATOMIC_RELEASE);\n" |
| 21909 | "}\n" |
| 21910 | "#endif\n" |
| 21911 | "/*----------------------------------------------------------------------------*\\\n" |
| 21912 | "|* Interlocked Increment\n" |
| 21913 | "\\*----------------------------------------------------------------------------*/\n" |
| 21914 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 21915 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21916 | "_InterlockedIncrement16_acq(short volatile *_Value) {\n" |
| 21917 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n" |
| 21918 | "}\n" |
| 21919 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21920 | "_InterlockedIncrement16_nf(short volatile *_Value) {\n" |
| 21921 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);\n" |
| 21922 | "}\n" |
| 21923 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21924 | "_InterlockedIncrement16_rel(short volatile *_Value) {\n" |
| 21925 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);\n" |
| 21926 | "}\n" |
| 21927 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21928 | "_InterlockedIncrement_acq(long volatile *_Value) {\n" |
| 21929 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n" |
| 21930 | "}\n" |
| 21931 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21932 | "_InterlockedIncrement_nf(long volatile *_Value) {\n" |
| 21933 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);\n" |
| 21934 | "}\n" |
| 21935 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21936 | "_InterlockedIncrement_rel(long volatile *_Value) {\n" |
| 21937 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);\n" |
| 21938 | "}\n" |
| 21939 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21940 | "_InterlockedIncrement64_acq(__int64 volatile *_Value) {\n" |
| 21941 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n" |
| 21942 | "}\n" |
| 21943 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21944 | "_InterlockedIncrement64_nf(__int64 volatile *_Value) {\n" |
| 21945 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_RELAXED);\n" |
| 21946 | "}\n" |
| 21947 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21948 | "_InterlockedIncrement64_rel(__int64 volatile *_Value) {\n" |
| 21949 | " return __atomic_add_fetch(_Value, 1, __ATOMIC_RELEASE);\n" |
| 21950 | "}\n" |
| 21951 | "#endif\n" |
| 21952 | "/*----------------------------------------------------------------------------*\\\n" |
| 21953 | "|* Interlocked Decrement\n" |
| 21954 | "\\*----------------------------------------------------------------------------*/\n" |
| 21955 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 21956 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21957 | "_InterlockedDecrement16_acq(short volatile *_Value) {\n" |
| 21958 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n" |
| 21959 | "}\n" |
| 21960 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21961 | "_InterlockedDecrement16_nf(short volatile *_Value) {\n" |
| 21962 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);\n" |
| 21963 | "}\n" |
| 21964 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 21965 | "_InterlockedDecrement16_rel(short volatile *_Value) {\n" |
| 21966 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);\n" |
| 21967 | "}\n" |
| 21968 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21969 | "_InterlockedDecrement_acq(long volatile *_Value) {\n" |
| 21970 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n" |
| 21971 | "}\n" |
| 21972 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21973 | "_InterlockedDecrement_nf(long volatile *_Value) {\n" |
| 21974 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);\n" |
| 21975 | "}\n" |
| 21976 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 21977 | "_InterlockedDecrement_rel(long volatile *_Value) {\n" |
| 21978 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);\n" |
| 21979 | "}\n" |
| 21980 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21981 | "_InterlockedDecrement64_acq(__int64 volatile *_Value) {\n" |
| 21982 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_ACQUIRE);\n" |
| 21983 | "}\n" |
| 21984 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21985 | "_InterlockedDecrement64_nf(__int64 volatile *_Value) {\n" |
| 21986 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELAXED);\n" |
| 21987 | "}\n" |
| 21988 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 21989 | "_InterlockedDecrement64_rel(__int64 volatile *_Value) {\n" |
| 21990 | " return __atomic_sub_fetch(_Value, 1, __ATOMIC_RELEASE);\n" |
| 21991 | "}\n" |
| 21992 | "#endif\n" |
| 21993 | "/*----------------------------------------------------------------------------*\\\n" |
| 21994 | "|* Interlocked And\n" |
| 21995 | "\\*----------------------------------------------------------------------------*/\n" |
| 21996 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 21997 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 21998 | "_InterlockedAnd8_acq(char volatile *_Value, char _Mask) {\n" |
| 21999 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22000 | "}\n" |
| 22001 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22002 | "_InterlockedAnd8_nf(char volatile *_Value, char _Mask) {\n" |
| 22003 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22004 | "}\n" |
| 22005 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22006 | "_InterlockedAnd8_rel(char volatile *_Value, char _Mask) {\n" |
| 22007 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22008 | "}\n" |
| 22009 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22010 | "_InterlockedAnd16_acq(short volatile *_Value, short _Mask) {\n" |
| 22011 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22012 | "}\n" |
| 22013 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22014 | "_InterlockedAnd16_nf(short volatile *_Value, short _Mask) {\n" |
| 22015 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22016 | "}\n" |
| 22017 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22018 | "_InterlockedAnd16_rel(short volatile *_Value, short _Mask) {\n" |
| 22019 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22020 | "}\n" |
| 22021 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22022 | "_InterlockedAnd_acq(long volatile *_Value, long _Mask) {\n" |
| 22023 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22024 | "}\n" |
| 22025 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22026 | "_InterlockedAnd_nf(long volatile *_Value, long _Mask) {\n" |
| 22027 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22028 | "}\n" |
| 22029 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22030 | "_InterlockedAnd_rel(long volatile *_Value, long _Mask) {\n" |
| 22031 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22032 | "}\n" |
| 22033 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22034 | "_InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22035 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22036 | "}\n" |
| 22037 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22038 | "_InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22039 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22040 | "}\n" |
| 22041 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22042 | "_InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22043 | " return __atomic_fetch_and(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22044 | "}\n" |
| 22045 | "#endif\n" |
| 22046 | "/*----------------------------------------------------------------------------*\\\n" |
| 22047 | "|* Bit Counting and Testing\n" |
| 22048 | "\\*----------------------------------------------------------------------------*/\n" |
| 22049 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 22050 | "unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,\n" |
| 22051 | " long _BitPos);\n" |
| 22052 | "unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,\n" |
| 22053 | " long _BitPos);\n" |
| 22054 | "unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,\n" |
| 22055 | " long _BitPos);\n" |
| 22056 | "unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,\n" |
| 22057 | " long _BitPos);\n" |
| 22058 | "unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,\n" |
| 22059 | " long _BitPos);\n" |
| 22060 | "unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,\n" |
| 22061 | " long _BitPos);\n" |
| 22062 | "#endif\n" |
| 22063 | "/*----------------------------------------------------------------------------*\\\n" |
| 22064 | "|* Interlocked Or\n" |
| 22065 | "\\*----------------------------------------------------------------------------*/\n" |
| 22066 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 22067 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22068 | "_InterlockedOr8_acq(char volatile *_Value, char _Mask) {\n" |
| 22069 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22070 | "}\n" |
| 22071 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22072 | "_InterlockedOr8_nf(char volatile *_Value, char _Mask) {\n" |
| 22073 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22074 | "}\n" |
| 22075 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22076 | "_InterlockedOr8_rel(char volatile *_Value, char _Mask) {\n" |
| 22077 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22078 | "}\n" |
| 22079 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22080 | "_InterlockedOr16_acq(short volatile *_Value, short _Mask) {\n" |
| 22081 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22082 | "}\n" |
| 22083 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22084 | "_InterlockedOr16_nf(short volatile *_Value, short _Mask) {\n" |
| 22085 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22086 | "}\n" |
| 22087 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22088 | "_InterlockedOr16_rel(short volatile *_Value, short _Mask) {\n" |
| 22089 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22090 | "}\n" |
| 22091 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22092 | "_InterlockedOr_acq(long volatile *_Value, long _Mask) {\n" |
| 22093 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22094 | "}\n" |
| 22095 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22096 | "_InterlockedOr_nf(long volatile *_Value, long _Mask) {\n" |
| 22097 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22098 | "}\n" |
| 22099 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22100 | "_InterlockedOr_rel(long volatile *_Value, long _Mask) {\n" |
| 22101 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22102 | "}\n" |
| 22103 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22104 | "_InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22105 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22106 | "}\n" |
| 22107 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22108 | "_InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22109 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22110 | "}\n" |
| 22111 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22112 | "_InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22113 | " return __atomic_fetch_or(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22114 | "}\n" |
| 22115 | "#endif\n" |
| 22116 | "/*----------------------------------------------------------------------------*\\\n" |
| 22117 | "|* Interlocked Xor\n" |
| 22118 | "\\*----------------------------------------------------------------------------*/\n" |
| 22119 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 22120 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22121 | "_InterlockedXor8_acq(char volatile *_Value, char _Mask) {\n" |
| 22122 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22123 | "}\n" |
| 22124 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22125 | "_InterlockedXor8_nf(char volatile *_Value, char _Mask) {\n" |
| 22126 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22127 | "}\n" |
| 22128 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22129 | "_InterlockedXor8_rel(char volatile *_Value, char _Mask) {\n" |
| 22130 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22131 | "}\n" |
| 22132 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22133 | "_InterlockedXor16_acq(short volatile *_Value, short _Mask) {\n" |
| 22134 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22135 | "}\n" |
| 22136 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22137 | "_InterlockedXor16_nf(short volatile *_Value, short _Mask) {\n" |
| 22138 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22139 | "}\n" |
| 22140 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22141 | "_InterlockedXor16_rel(short volatile *_Value, short _Mask) {\n" |
| 22142 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22143 | "}\n" |
| 22144 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22145 | "_InterlockedXor_acq(long volatile *_Value, long _Mask) {\n" |
| 22146 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22147 | "}\n" |
| 22148 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22149 | "_InterlockedXor_nf(long volatile *_Value, long _Mask) {\n" |
| 22150 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22151 | "}\n" |
| 22152 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22153 | "_InterlockedXor_rel(long volatile *_Value, long _Mask) {\n" |
| 22154 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22155 | "}\n" |
| 22156 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22157 | "_InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22158 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_ACQUIRE);\n" |
| 22159 | "}\n" |
| 22160 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22161 | "_InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22162 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELAXED);\n" |
| 22163 | "}\n" |
| 22164 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22165 | "_InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask) {\n" |
| 22166 | " return __atomic_fetch_xor(_Value, _Mask, __ATOMIC_RELEASE);\n" |
| 22167 | "}\n" |
| 22168 | "#endif\n" |
| 22169 | "/*----------------------------------------------------------------------------*\\\n" |
| 22170 | "|* Interlocked Exchange\n" |
| 22171 | "\\*----------------------------------------------------------------------------*/\n" |
| 22172 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 22173 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22174 | "_InterlockedExchange8_acq(char volatile *_Target, char _Value) {\n" |
| 22175 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);\n" |
| 22176 | " return _Value;\n" |
| 22177 | "}\n" |
| 22178 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22179 | "_InterlockedExchange8_nf(char volatile *_Target, char _Value) {\n" |
| 22180 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);\n" |
| 22181 | " return _Value;\n" |
| 22182 | "}\n" |
| 22183 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22184 | "_InterlockedExchange8_rel(char volatile *_Target, char _Value) {\n" |
| 22185 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);\n" |
| 22186 | " return _Value;\n" |
| 22187 | "}\n" |
| 22188 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22189 | "_InterlockedExchange16_acq(short volatile *_Target, short _Value) {\n" |
| 22190 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);\n" |
| 22191 | " return _Value;\n" |
| 22192 | "}\n" |
| 22193 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22194 | "_InterlockedExchange16_nf(short volatile *_Target, short _Value) {\n" |
| 22195 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);\n" |
| 22196 | " return _Value;\n" |
| 22197 | "}\n" |
| 22198 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22199 | "_InterlockedExchange16_rel(short volatile *_Target, short _Value) {\n" |
| 22200 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);\n" |
| 22201 | " return _Value;\n" |
| 22202 | "}\n" |
| 22203 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22204 | "_InterlockedExchange_acq(long volatile *_Target, long _Value) {\n" |
| 22205 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);\n" |
| 22206 | " return _Value;\n" |
| 22207 | "}\n" |
| 22208 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22209 | "_InterlockedExchange_nf(long volatile *_Target, long _Value) {\n" |
| 22210 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);\n" |
| 22211 | " return _Value;\n" |
| 22212 | "}\n" |
| 22213 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22214 | "_InterlockedExchange_rel(long volatile *_Target, long _Value) {\n" |
| 22215 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);\n" |
| 22216 | " return _Value;\n" |
| 22217 | "}\n" |
| 22218 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22219 | "_InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value) {\n" |
| 22220 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_ACQUIRE);\n" |
| 22221 | " return _Value;\n" |
| 22222 | "}\n" |
| 22223 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22224 | "_InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value) {\n" |
| 22225 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELAXED);\n" |
| 22226 | " return _Value;\n" |
| 22227 | "}\n" |
| 22228 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22229 | "_InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value) {\n" |
| 22230 | " __atomic_exchange(_Target, &_Value, &_Value, __ATOMIC_RELEASE);\n" |
| 22231 | " return _Value;\n" |
| 22232 | "}\n" |
| 22233 | "#endif\n" |
| 22234 | "/*----------------------------------------------------------------------------*\\\n" |
| 22235 | "|* Interlocked Compare Exchange\n" |
| 22236 | "\\*----------------------------------------------------------------------------*/\n" |
| 22237 | "#if defined(__arm__) || defined(__aarch64__)\n" |
| 22238 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22239 | "_InterlockedCompareExchange8_acq(char volatile *_Destination,\n" |
| 22240 | " char _Exchange, char _Comparand) {\n" |
| 22241 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22242 | " __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);\n" |
| 22243 | " return _Comparand;\n" |
| 22244 | "}\n" |
| 22245 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22246 | "_InterlockedCompareExchange8_nf(char volatile *_Destination,\n" |
| 22247 | " char _Exchange, char _Comparand) {\n" |
| 22248 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22249 | " __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);\n" |
| 22250 | " return _Comparand;\n" |
| 22251 | "}\n" |
| 22252 | "static __inline__ char __DEFAULT_FN_ATTRS\n" |
| 22253 | "_InterlockedCompareExchange8_rel(char volatile *_Destination,\n" |
| 22254 | " char _Exchange, char _Comparand) {\n" |
| 22255 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22256 | " __ATOMIC_SEQ_CST, __ATOMIC_RELEASE);\n" |
| 22257 | " return _Comparand;\n" |
| 22258 | "}\n" |
| 22259 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22260 | "_InterlockedCompareExchange16_acq(short volatile *_Destination,\n" |
| 22261 | " short _Exchange, short _Comparand) {\n" |
| 22262 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22263 | " __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);\n" |
| 22264 | " return _Comparand;\n" |
| 22265 | "}\n" |
| 22266 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22267 | "_InterlockedCompareExchange16_nf(short volatile *_Destination,\n" |
| 22268 | " short _Exchange, short _Comparand) {\n" |
| 22269 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22270 | " __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);\n" |
| 22271 | " return _Comparand;\n" |
| 22272 | "}\n" |
| 22273 | "static __inline__ short __DEFAULT_FN_ATTRS\n" |
| 22274 | "_InterlockedCompareExchange16_rel(short volatile *_Destination,\n" |
| 22275 | " short _Exchange, short _Comparand) {\n" |
| 22276 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22277 | " __ATOMIC_SEQ_CST, __ATOMIC_RELEASE);\n" |
| 22278 | " return _Comparand;\n" |
| 22279 | "}\n" |
| 22280 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22281 | "_InterlockedCompareExchange_acq(long volatile *_Destination,\n" |
| 22282 | " long _Exchange, long _Comparand) {\n" |
| 22283 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22284 | " __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);\n" |
| 22285 | " return _Comparand;\n" |
| 22286 | "}\n" |
| 22287 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22288 | "_InterlockedCompareExchange_nf(long volatile *_Destination,\n" |
| 22289 | " long _Exchange, long _Comparand) {\n" |
| 22290 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22291 | " __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);\n" |
| 22292 | " return _Comparand;\n" |
| 22293 | "}\n" |
| 22294 | "static __inline__ long __DEFAULT_FN_ATTRS\n" |
| 22295 | "_InterlockedCompareExchange_rel(long volatile *_Destination,\n" |
| 22296 | " long _Exchange, long _Comparand) {\n" |
| 22297 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22298 | " __ATOMIC_SEQ_CST, __ATOMIC_RELEASE);\n" |
| 22299 | " return _Comparand;\n" |
| 22300 | "}\n" |
| 22301 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22302 | "_InterlockedCompareExchange64_acq(__int64 volatile *_Destination,\n" |
| 22303 | " __int64 _Exchange, __int64 _Comparand) {\n" |
| 22304 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22305 | " __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);\n" |
| 22306 | " return _Comparand;\n" |
| 22307 | "}\n" |
| 22308 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22309 | "_InterlockedCompareExchange64_nf(__int64 volatile *_Destination,\n" |
| 22310 | " __int64 _Exchange, __int64 _Comparand) {\n" |
| 22311 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22312 | " __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);\n" |
| 22313 | " return _Comparand;\n" |
| 22314 | "}\n" |
| 22315 | "static __inline__ __int64 __DEFAULT_FN_ATTRS\n" |
| 22316 | "_InterlockedCompareExchange64_rel(__int64 volatile *_Destination,\n" |
| 22317 | " __int64 _Exchange, __int64 _Comparand) {\n" |
| 22318 | " __atomic_compare_exchange(_Destination, &_Comparand, &_Exchange, 0,\n" |
| 22319 | " __ATOMIC_SEQ_CST, __ATOMIC_RELEASE);\n" |
| 22320 | " return _Comparand;\n" |
| 22321 | "}\n" |
| 22322 | "#endif\n" |
| 22323 | "\n" |
| 22324 | "/*----------------------------------------------------------------------------*\\\n" |
| 22325 | "|* movs, stos\n" |
| 22326 | "\\*----------------------------------------------------------------------------*/\n" |
| 22327 | "#if defined(__i386__) || defined(__x86_64__)\n" |
| 22328 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22329 | "__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) {\n" |
| 22330 | " __asm__ __volatile__(\"rep movsb\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n" |
| 22331 | " : : \"memory\");\n" |
| 22332 | "}\n" |
| 22333 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22334 | "__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {\n" |
| 22335 | " __asm__ __volatile__(\"rep movsl\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n" |
| 22336 | " : : \"memory\");\n" |
| 22337 | "}\n" |
| 22338 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22339 | "__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {\n" |
| 22340 | " __asm__ __volatile__(\"rep movsw\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n" |
| 22341 | " : : \"memory\");\n" |
| 22342 | "}\n" |
| 22343 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22344 | "__stosd(unsigned long *__dst, unsigned long __x, size_t __n) {\n" |
| 22345 | " __asm__ __volatile__(\"rep stosl\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n" |
| 22346 | " : \"memory\");\n" |
| 22347 | "}\n" |
| 22348 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22349 | "__stosw(unsigned short *__dst, unsigned short __x, size_t __n) {\n" |
| 22350 | " __asm__ __volatile__(\"rep stosw\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n" |
| 22351 | " : \"memory\");\n" |
| 22352 | "}\n" |
| 22353 | "#endif\n" |
| 22354 | "#ifdef __x86_64__\n" |
| 22355 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22356 | "__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) {\n" |
| 22357 | " __asm__ __volatile__(\"rep movsq\" : \"+D\"(__dst), \"+S\"(__src), \"+c\"(__n)\n" |
| 22358 | " : : \"memory\");\n" |
| 22359 | "}\n" |
| 22360 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22361 | "__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) {\n" |
| 22362 | " __asm__ __volatile__(\"rep stosq\" : \"+D\"(__dst), \"+c\"(__n) : \"a\"(__x)\n" |
| 22363 | " : \"memory\");\n" |
| 22364 | "}\n" |
| 22365 | "#endif\n" |
| 22366 | "\n" |
| 22367 | "/*----------------------------------------------------------------------------*\\\n" |
| 22368 | "|* Misc\n" |
| 22369 | "\\*----------------------------------------------------------------------------*/\n" |
| 22370 | "#if defined(__i386__) || defined(__x86_64__)\n" |
| 22371 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22372 | "__cpuid(int __info[4], int __level) {\n" |
| 22373 | " __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n" |
| 22374 | " : \"a\"(__level));\n" |
| 22375 | "}\n" |
| 22376 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22377 | "__cpuidex(int __info[4], int __level, int __ecx) {\n" |
| 22378 | " __asm__ (\"cpuid\" : \"=a\"(__info[0]), \"=b\" (__info[1]), \"=c\"(__info[2]), \"=d\"(__info[3])\n" |
| 22379 | " : \"a\"(__level), \"c\"(__ecx));\n" |
| 22380 | "}\n" |
| 22381 | "static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS\n" |
| 22382 | "_xgetbv(unsigned int __xcr_no) {\n" |
| 22383 | " unsigned int __eax, __edx;\n" |
| 22384 | " __asm__ (\"xgetbv\" : \"=a\" (__eax), \"=d\" (__edx) : \"c\" (__xcr_no));\n" |
| 22385 | " return ((unsigned __int64)__edx << 32) | __eax;\n" |
| 22386 | "}\n" |
| 22387 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22388 | "__halt(void) {\n" |
| 22389 | " __asm__ volatile (\"hlt\");\n" |
| 22390 | "}\n" |
| 22391 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22392 | "__nop(void) {\n" |
| 22393 | " __asm__ volatile (\"nop\");\n" |
| 22394 | "}\n" |
| 22395 | "#endif\n" |
| 22396 | "\n" |
| 22397 | "/*----------------------------------------------------------------------------*\\\n" |
| 22398 | "|* Privileged intrinsics\n" |
| 22399 | "\\*----------------------------------------------------------------------------*/\n" |
| 22400 | "#if defined(__i386__) || defined(__x86_64__)\n" |
| 22401 | "static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS\n" |
| 22402 | "__readmsr(unsigned long __register) {\n" |
| 22403 | " // Loads the contents of a 64-bit model specific register (MSR) specified in\n" |
| 22404 | " // the ECX register into registers EDX:EAX. The EDX register is loaded with\n" |
| 22405 | " // the high-order 32 bits of the MSR and the EAX register is loaded with the\n" |
| 22406 | " // low-order 32 bits. If less than 64 bits are implemented in the MSR being\n" |
| 22407 | " // read, the values returned to EDX:EAX in unimplemented bit locations are\n" |
| 22408 | " // undefined.\n" |
| 22409 | " unsigned long __edx;\n" |
| 22410 | " unsigned long __eax;\n" |
| 22411 | " __asm__ (\"rdmsr\" : \"=d\"(__edx), \"=a\"(__eax) : \"c\"(__register));\n" |
| 22412 | " return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;\n" |
| 22413 | "}\n" |
| 22414 | "\n" |
| 22415 | "static __inline__ unsigned long __DEFAULT_FN_ATTRS\n" |
| 22416 | "__readcr3(void) {\n" |
| 22417 | " unsigned long __cr3_val;\n" |
| 22418 | " __asm__ __volatile__ (\"mov %%cr3, %0\" : \"=q\"(__cr3_val) : : \"memory\");\n" |
| 22419 | " return __cr3_val;\n" |
| 22420 | "}\n" |
| 22421 | "\n" |
| 22422 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22423 | "__writecr3(unsigned int __cr3_val) {\n" |
| 22424 | " __asm__ (\"mov %0, %%cr3\" : : \"q\"(__cr3_val) : \"memory\");\n" |
| 22425 | "}\n" |
| 22426 | "#endif\n" |
| 22427 | "\n" |
| 22428 | "#ifdef __cplusplus\n" |
| 22429 | "}\n" |
| 22430 | "#endif\n" |
| 22431 | "\n" |
| 22432 | "#undef __DEFAULT_FN_ATTRS\n" |
| 22433 | "\n" |
| 22434 | "#endif /* __INTRIN_H */\n" |
| 22435 | "#endif /* _MSC_VER */\n" |
| 22436 | "" } , |
| 22437 | { "/builtins/inttypes.h" , "/*===---- inttypes.h - Standard header for integer printf macros ----------===*\\\n" |
| 22438 | " *\n" |
| 22439 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 22440 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 22441 | " * in the Software without restriction, including without limitation the rights\n" |
| 22442 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 22443 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 22444 | " * furnished to do so, subject to the following conditions:\n" |
| 22445 | " *\n" |
| 22446 | " * The above copyright notice and this permission notice shall be included in\n" |
| 22447 | " * all copies or substantial portions of the Software.\n" |
| 22448 | " *\n" |
| 22449 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 22450 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 22451 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 22452 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 22453 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 22454 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 22455 | " * THE SOFTWARE.\n" |
| 22456 | " *\n" |
| 22457 | "\\*===----------------------------------------------------------------------===*/\n" |
| 22458 | "\n" |
| 22459 | "#ifndef __CLANG_INTTYPES_H\n" |
| 22460 | "#define __CLANG_INTTYPES_H\n" |
| 22461 | "\n" |
| 22462 | "#if defined(_MSC_VER) && _MSC_VER < 1800\n" |
| 22463 | "#error MSVC does not have inttypes.h prior to Visual Studio 2013\n" |
| 22464 | "#endif\n" |
| 22465 | "\n" |
| 22466 | "#include_next <inttypes.h>\n" |
| 22467 | "\n" |
| 22468 | "#if defined(_MSC_VER) && _MSC_VER < 1900\n" |
| 22469 | "/* MSVC headers define int32_t as int, but PRIx32 as \"lx\" instead of \"x\".\n" |
| 22470 | " * This triggers format warnings, so fix it up here. */\n" |
| 22471 | "#undef PRId32\n" |
| 22472 | "#undef PRIdLEAST32\n" |
| 22473 | "#undef PRIdFAST32\n" |
| 22474 | "#undef PRIi32\n" |
| 22475 | "#undef PRIiLEAST32\n" |
| 22476 | "#undef PRIiFAST32\n" |
| 22477 | "#undef PRIo32\n" |
| 22478 | "#undef PRIoLEAST32\n" |
| 22479 | "#undef PRIoFAST32\n" |
| 22480 | "#undef PRIu32\n" |
| 22481 | "#undef PRIuLEAST32\n" |
| 22482 | "#undef PRIuFAST32\n" |
| 22483 | "#undef PRIx32\n" |
| 22484 | "#undef PRIxLEAST32\n" |
| 22485 | "#undef PRIxFAST32\n" |
| 22486 | "#undef PRIX32\n" |
| 22487 | "#undef PRIXLEAST32\n" |
| 22488 | "#undef PRIXFAST32\n" |
| 22489 | "\n" |
| 22490 | "#undef SCNd32\n" |
| 22491 | "#undef SCNdLEAST32\n" |
| 22492 | "#undef SCNdFAST32\n" |
| 22493 | "#undef SCNi32\n" |
| 22494 | "#undef SCNiLEAST32\n" |
| 22495 | "#undef SCNiFAST32\n" |
| 22496 | "#undef SCNo32\n" |
| 22497 | "#undef SCNoLEAST32\n" |
| 22498 | "#undef SCNoFAST32\n" |
| 22499 | "#undef SCNu32\n" |
| 22500 | "#undef SCNuLEAST32\n" |
| 22501 | "#undef SCNuFAST32\n" |
| 22502 | "#undef SCNx32\n" |
| 22503 | "#undef SCNxLEAST32\n" |
| 22504 | "#undef SCNxFAST32\n" |
| 22505 | "\n" |
| 22506 | "#define PRId32 \"d\"\n" |
| 22507 | "#define PRIdLEAST32 \"d\"\n" |
| 22508 | "#define PRIdFAST32 \"d\"\n" |
| 22509 | "#define PRIi32 \"i\"\n" |
| 22510 | "#define PRIiLEAST32 \"i\"\n" |
| 22511 | "#define PRIiFAST32 \"i\"\n" |
| 22512 | "#define PRIo32 \"o\"\n" |
| 22513 | "#define PRIoLEAST32 \"o\"\n" |
| 22514 | "#define PRIoFAST32 \"o\"\n" |
| 22515 | "#define PRIu32 \"u\"\n" |
| 22516 | "#define PRIuLEAST32 \"u\"\n" |
| 22517 | "#define PRIuFAST32 \"u\"\n" |
| 22518 | "#define PRIx32 \"x\"\n" |
| 22519 | "#define PRIxLEAST32 \"x\"\n" |
| 22520 | "#define PRIxFAST32 \"x\"\n" |
| 22521 | "#define PRIX32 \"X\"\n" |
| 22522 | "#define PRIXLEAST32 \"X\"\n" |
| 22523 | "#define PRIXFAST32 \"X\"\n" |
| 22524 | "\n" |
| 22525 | "#define SCNd32 \"d\"\n" |
| 22526 | "#define SCNdLEAST32 \"d\"\n" |
| 22527 | "#define SCNdFAST32 \"d\"\n" |
| 22528 | "#define SCNi32 \"i\"\n" |
| 22529 | "#define SCNiLEAST32 \"i\"\n" |
| 22530 | "#define SCNiFAST32 \"i\"\n" |
| 22531 | "#define SCNo32 \"o\"\n" |
| 22532 | "#define SCNoLEAST32 \"o\"\n" |
| 22533 | "#define SCNoFAST32 \"o\"\n" |
| 22534 | "#define SCNu32 \"u\"\n" |
| 22535 | "#define SCNuLEAST32 \"u\"\n" |
| 22536 | "#define SCNuFAST32 \"u\"\n" |
| 22537 | "#define SCNx32 \"x\"\n" |
| 22538 | "#define SCNxLEAST32 \"x\"\n" |
| 22539 | "#define SCNxFAST32 \"x\"\n" |
| 22540 | "#endif\n" |
| 22541 | "\n" |
| 22542 | "#endif /* __CLANG_INTTYPES_H */\n" |
| 22543 | "" } , |
| 22544 | { "/builtins/invpcidintrin.h" , "/*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------===\n" |
| 22545 | " *\n" |
| 22546 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 22547 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 22548 | " * in the Software without restriction, including without limitation the rights\n" |
| 22549 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 22550 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 22551 | " * furnished to do so, subject to the following conditions:\n" |
| 22552 | " *\n" |
| 22553 | " * The above copyright notice and this permission notice shall be included in\n" |
| 22554 | " * all copies or substantial portions of the Software.\n" |
| 22555 | " *\n" |
| 22556 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 22557 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 22558 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 22559 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 22560 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 22561 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 22562 | " * THE SOFTWARE.\n" |
| 22563 | " *\n" |
| 22564 | " *===-----------------------------------------------------------------------===\n" |
| 22565 | " */\n" |
| 22566 | "\n" |
| 22567 | "#ifndef __IMMINTRIN_H\n" |
| 22568 | "#error \"Never use <invpcidintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 22569 | "#endif\n" |
| 22570 | "\n" |
| 22571 | "#ifndef __INVPCIDINTRIN_H\n" |
| 22572 | "#define __INVPCIDINTRIN_H\n" |
| 22573 | "\n" |
| 22574 | "static __inline__ void\n" |
| 22575 | " __attribute__((__always_inline__, __nodebug__, __target__(\"invpcid\")))\n" |
| 22576 | "_invpcid(unsigned int __type, void *__descriptor) {\n" |
| 22577 | " __builtin_ia32_invpcid(__type, __descriptor);\n" |
| 22578 | "}\n" |
| 22579 | "\n" |
| 22580 | "#endif /* __INVPCIDINTRIN_H */\n" |
| 22581 | "" } , |
| 22582 | { "/builtins/iso646.h" , "/*===---- iso646.h - Standard header for alternate spellings of operators---===\n" |
| 22583 | " *\n" |
| 22584 | " * Copyright (c) 2008 Eli Friedman\n" |
| 22585 | " *\n" |
| 22586 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 22587 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 22588 | " * in the Software without restriction, including without limitation the rights\n" |
| 22589 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 22590 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 22591 | " * furnished to do so, subject to the following conditions:\n" |
| 22592 | " *\n" |
| 22593 | " * The above copyright notice and this permission notice shall be included in\n" |
| 22594 | " * all copies or substantial portions of the Software.\n" |
| 22595 | " *\n" |
| 22596 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 22597 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 22598 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 22599 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 22600 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 22601 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 22602 | " * THE SOFTWARE.\n" |
| 22603 | " *\n" |
| 22604 | " *===-----------------------------------------------------------------------===\n" |
| 22605 | " */\n" |
| 22606 | "\n" |
| 22607 | "#ifndef __ISO646_H\n" |
| 22608 | "#define __ISO646_H\n" |
| 22609 | "\n" |
| 22610 | "#ifndef __cplusplus\n" |
| 22611 | "#define and &&\n" |
| 22612 | "#define and_eq &=\n" |
| 22613 | "#define bitand &\n" |
| 22614 | "#define bitor |\n" |
| 22615 | "#define compl ~\n" |
| 22616 | "#define not !\n" |
| 22617 | "#define not_eq !=\n" |
| 22618 | "#define or ||\n" |
| 22619 | "#define or_eq |=\n" |
| 22620 | "#define xor ^\n" |
| 22621 | "#define xor_eq ^=\n" |
| 22622 | "#endif\n" |
| 22623 | "\n" |
| 22624 | "#endif /* __ISO646_H */\n" |
| 22625 | "" } , |
| 22626 | { "/builtins/limits.h" , "/*===---- limits.h - Standard header for integer sizes --------------------===*\\\n" |
| 22627 | " *\n" |
| 22628 | " * Copyright (c) 2009 Chris Lattner\n" |
| 22629 | " *\n" |
| 22630 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 22631 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 22632 | " * in the Software without restriction, including without limitation the rights\n" |
| 22633 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 22634 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 22635 | " * furnished to do so, subject to the following conditions:\n" |
| 22636 | " *\n" |
| 22637 | " * The above copyright notice and this permission notice shall be included in\n" |
| 22638 | " * all copies or substantial portions of the Software.\n" |
| 22639 | " *\n" |
| 22640 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 22641 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 22642 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 22643 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 22644 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 22645 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 22646 | " * THE SOFTWARE.\n" |
| 22647 | " *\n" |
| 22648 | "\\*===----------------------------------------------------------------------===*/\n" |
| 22649 | "\n" |
| 22650 | "#ifndef __CLANG_LIMITS_H\n" |
| 22651 | "#define __CLANG_LIMITS_H\n" |
| 22652 | "\n" |
| 22653 | "/* The system's limits.h may, in turn, try to #include_next GCC's limits.h.\n" |
| 22654 | " Avert this #include_next madness. */\n" |
| 22655 | "#if defined __GNUC__ && !defined _GCC_LIMITS_H_\n" |
| 22656 | "#define _GCC_LIMITS_H_\n" |
| 22657 | "#endif\n" |
| 22658 | "\n" |
| 22659 | "/* System headers include a number of constants from POSIX in <limits.h>.\n" |
| 22660 | " Include it if we're hosted. */\n" |
| 22661 | "#if __STDC_HOSTED__ && __has_include_next(<limits.h>)\n" |
| 22662 | "#include_next <limits.h>\n" |
| 22663 | "#endif\n" |
| 22664 | "\n" |
| 22665 | "/* Many system headers try to \"help us out\" by defining these. No really, we\n" |
| 22666 | " know how big each datatype is. */\n" |
| 22667 | "#undef SCHAR_MIN\n" |
| 22668 | "#undef SCHAR_MAX\n" |
| 22669 | "#undef UCHAR_MAX\n" |
| 22670 | "#undef SHRT_MIN\n" |
| 22671 | "#undef SHRT_MAX\n" |
| 22672 | "#undef USHRT_MAX\n" |
| 22673 | "#undef INT_MIN\n" |
| 22674 | "#undef INT_MAX\n" |
| 22675 | "#undef UINT_MAX\n" |
| 22676 | "#undef LONG_MIN\n" |
| 22677 | "#undef LONG_MAX\n" |
| 22678 | "#undef ULONG_MAX\n" |
| 22679 | "\n" |
| 22680 | "#undef CHAR_BIT\n" |
| 22681 | "#undef CHAR_MIN\n" |
| 22682 | "#undef CHAR_MAX\n" |
| 22683 | "\n" |
| 22684 | "/* C90/99 5.2.4.2.1 */\n" |
| 22685 | "#define SCHAR_MAX __SCHAR_MAX__\n" |
| 22686 | "#define SHRT_MAX __SHRT_MAX__\n" |
| 22687 | "#define INT_MAX __INT_MAX__\n" |
| 22688 | "#define LONG_MAX __LONG_MAX__\n" |
| 22689 | "\n" |
| 22690 | "#define SCHAR_MIN (-__SCHAR_MAX__-1)\n" |
| 22691 | "#define SHRT_MIN (-__SHRT_MAX__ -1)\n" |
| 22692 | "#define INT_MIN (-__INT_MAX__ -1)\n" |
| 22693 | "#define LONG_MIN (-__LONG_MAX__ -1L)\n" |
| 22694 | "\n" |
| 22695 | "#define UCHAR_MAX (__SCHAR_MAX__*2 +1)\n" |
| 22696 | "#define USHRT_MAX (__SHRT_MAX__ *2 +1)\n" |
| 22697 | "#define UINT_MAX (__INT_MAX__ *2U +1U)\n" |
| 22698 | "#define ULONG_MAX (__LONG_MAX__ *2UL+1UL)\n" |
| 22699 | "\n" |
| 22700 | "#ifndef MB_LEN_MAX\n" |
| 22701 | "#define MB_LEN_MAX 1\n" |
| 22702 | "#endif\n" |
| 22703 | "\n" |
| 22704 | "#define CHAR_BIT __CHAR_BIT__\n" |
| 22705 | "\n" |
| 22706 | "#ifdef __CHAR_UNSIGNED__ /* -funsigned-char */\n" |
| 22707 | "#define CHAR_MIN 0\n" |
| 22708 | "#define CHAR_MAX UCHAR_MAX\n" |
| 22709 | "#else\n" |
| 22710 | "#define CHAR_MIN SCHAR_MIN\n" |
| 22711 | "#define CHAR_MAX __SCHAR_MAX__\n" |
| 22712 | "#endif\n" |
| 22713 | "\n" |
| 22714 | "/* C99 5.2.4.2.1: Added long long.\n" |
| 22715 | " C++11 18.3.3.2: same contents as the Standard C Library header <limits.h>.\n" |
| 22716 | " */\n" |
| 22717 | "#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L\n" |
| 22718 | "\n" |
| 22719 | "#undef LLONG_MIN\n" |
| 22720 | "#undef LLONG_MAX\n" |
| 22721 | "#undef ULLONG_MAX\n" |
| 22722 | "\n" |
| 22723 | "#define LLONG_MAX __LONG_LONG_MAX__\n" |
| 22724 | "#define LLONG_MIN (-__LONG_LONG_MAX__-1LL)\n" |
| 22725 | "#define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n" |
| 22726 | "#endif\n" |
| 22727 | "\n" |
| 22728 | "/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad\n" |
| 22729 | " that we don't have something like #pragma poison that could be used to\n" |
| 22730 | " deprecate a macro - the code should just use LLONG_MAX and friends.\n" |
| 22731 | " */\n" |
| 22732 | "#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__)\n" |
| 22733 | "\n" |
| 22734 | "#undef LONG_LONG_MIN\n" |
| 22735 | "#undef LONG_LONG_MAX\n" |
| 22736 | "#undef ULONG_LONG_MAX\n" |
| 22737 | "\n" |
| 22738 | "#define LONG_LONG_MAX __LONG_LONG_MAX__\n" |
| 22739 | "#define LONG_LONG_MIN (-__LONG_LONG_MAX__-1LL)\n" |
| 22740 | "#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)\n" |
| 22741 | "#endif\n" |
| 22742 | "\n" |
| 22743 | "#endif /* __CLANG_LIMITS_H */\n" |
| 22744 | "" } , |
| 22745 | { "/builtins/lwpintrin.h" , "/*===---- lwpintrin.h - LWP intrinsics -------------------------------------===\n" |
| 22746 | " *\n" |
| 22747 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 22748 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 22749 | " * in the Software without restriction, including without limitation the rights\n" |
| 22750 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 22751 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 22752 | " * furnished to do so, subject to the following conditions:\n" |
| 22753 | " *\n" |
| 22754 | " * The above copyright notice and this permission notice shall be included in\n" |
| 22755 | " * all copies or substantial portions of the Software.\n" |
| 22756 | " *\n" |
| 22757 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 22758 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 22759 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 22760 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 22761 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 22762 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 22763 | " * THE SOFTWARE.\n" |
| 22764 | " *\n" |
| 22765 | " *===-----------------------------------------------------------------------===\n" |
| 22766 | " */\n" |
| 22767 | "\n" |
| 22768 | "#ifndef __X86INTRIN_H\n" |
| 22769 | "#error \"Never use <lwpintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 22770 | "#endif\n" |
| 22771 | "\n" |
| 22772 | "#ifndef __LWPINTRIN_H\n" |
| 22773 | "#define __LWPINTRIN_H\n" |
| 22774 | "\n" |
| 22775 | "/* Define the default attributes for the functions in this file. */\n" |
| 22776 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lwp\")))\n" |
| 22777 | "\n" |
| 22778 | "/// Parses the LWPCB at the specified address and enables\n" |
| 22779 | "/// profiling if valid.\n" |
| 22780 | "///\n" |
| 22781 | "/// \\headerfile <x86intrin.h>\n" |
| 22782 | "///\n" |
| 22783 | "/// This intrinsic corresponds to the <c> LLWPCB </c> instruction.\n" |
| 22784 | "///\n" |
| 22785 | "/// \\param __addr\n" |
| 22786 | "/// Address to the new Lightweight Profiling Control Block (LWPCB). If the\n" |
| 22787 | "/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables\n" |
| 22788 | "/// Lightweight Profiling.\n" |
| 22789 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 22790 | "__llwpcb (void *__addr)\n" |
| 22791 | "{\n" |
| 22792 | " __builtin_ia32_llwpcb(__addr);\n" |
| 22793 | "}\n" |
| 22794 | "\n" |
| 22795 | "/// Flushes the LWP state to memory and returns the address of the LWPCB.\n" |
| 22796 | "///\n" |
| 22797 | "/// \\headerfile <x86intrin.h>\n" |
| 22798 | "///\n" |
| 22799 | "/// This intrinsic corresponds to the <c> SLWPCB </c> instruction.\n" |
| 22800 | "///\n" |
| 22801 | "/// \\return\n" |
| 22802 | "/// Address to the current Lightweight Profiling Control Block (LWPCB).\n" |
| 22803 | "/// If LWP is not currently enabled, returns NULL.\n" |
| 22804 | "static __inline__ void* __DEFAULT_FN_ATTRS\n" |
| 22805 | "__slwpcb (void)\n" |
| 22806 | "{\n" |
| 22807 | " return __builtin_ia32_slwpcb();\n" |
| 22808 | "}\n" |
| 22809 | "\n" |
| 22810 | "/// Inserts programmed event record into the LWP event ring buffer\n" |
| 22811 | "/// and advances the ring buffer pointer.\n" |
| 22812 | "///\n" |
| 22813 | "/// \\headerfile <x86intrin.h>\n" |
| 22814 | "///\n" |
| 22815 | "/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n" |
| 22816 | "///\n" |
| 22817 | "/// \\param DATA2\n" |
| 22818 | "/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n" |
| 22819 | "/// \\param DATA1\n" |
| 22820 | "/// A 32-bit value is inserted into the 32-bit Data1 field.\n" |
| 22821 | "/// \\param FLAGS\n" |
| 22822 | "/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n" |
| 22823 | "/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n" |
| 22824 | "/// the event record overwrites the last record in the buffer, the MissedEvents\n" |
| 22825 | "/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n" |
| 22826 | "/// 1 is returned. Otherwise 0 is returned.\n" |
| 22827 | "#define __lwpins32(DATA2, DATA1, FLAGS) \\\n" |
| 22828 | " (__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n" |
| 22829 | " (unsigned int) (FLAGS)))\n" |
| 22830 | "\n" |
| 22831 | "/// Decrements the LWP programmed value sample event counter. If the result is\n" |
| 22832 | "/// negative, inserts an event record into the LWP event ring buffer in memory\n" |
| 22833 | "/// and advances the ring buffer pointer.\n" |
| 22834 | "///\n" |
| 22835 | "/// \\headerfile <x86intrin.h>\n" |
| 22836 | "///\n" |
| 22837 | "/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n" |
| 22838 | "///\n" |
| 22839 | "/// \\param DATA2\n" |
| 22840 | "/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.\n" |
| 22841 | "/// \\param DATA1\n" |
| 22842 | "/// A 32-bit value is inserted into the 32-bit Data1 field.\n" |
| 22843 | "/// \\param FLAGS\n" |
| 22844 | "/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n" |
| 22845 | "#define __lwpval32(DATA2, DATA1, FLAGS) \\\n" |
| 22846 | " (__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \\\n" |
| 22847 | " (unsigned int) (FLAGS)))\n" |
| 22848 | "\n" |
| 22849 | "#ifdef __x86_64__\n" |
| 22850 | "\n" |
| 22851 | "/// Inserts programmed event record into the LWP event ring buffer\n" |
| 22852 | "/// and advances the ring buffer pointer.\n" |
| 22853 | "///\n" |
| 22854 | "/// \\headerfile <x86intrin.h>\n" |
| 22855 | "///\n" |
| 22856 | "/// This intrinsic corresponds to the <c> LWPINS </c> instruction.\n" |
| 22857 | "///\n" |
| 22858 | "/// \\param DATA2\n" |
| 22859 | "/// A 64-bit value is inserted into the 64-bit Data2 field.\n" |
| 22860 | "/// \\param DATA1\n" |
| 22861 | "/// A 32-bit value is inserted into the 32-bit Data1 field.\n" |
| 22862 | "/// \\param FLAGS\n" |
| 22863 | "/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n" |
| 22864 | "/// \\returns If the ring buffer is full and LWP is running in Synchronized Mode,\n" |
| 22865 | "/// the event record overwrites the last record in the buffer, the MissedEvents\n" |
| 22866 | "/// counter in the LWPCB is incremented, the head pointer is not advanced, and\n" |
| 22867 | "/// 1 is returned. Otherwise 0 is returned.\n" |
| 22868 | "#define __lwpins64(DATA2, DATA1, FLAGS) \\\n" |
| 22869 | " (__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n" |
| 22870 | " (unsigned int) (FLAGS)))\n" |
| 22871 | "\n" |
| 22872 | "/// Decrements the LWP programmed value sample event counter. If the result is\n" |
| 22873 | "/// negative, inserts an event record into the LWP event ring buffer in memory\n" |
| 22874 | "/// and advances the ring buffer pointer.\n" |
| 22875 | "///\n" |
| 22876 | "/// \\headerfile <x86intrin.h>\n" |
| 22877 | "///\n" |
| 22878 | "/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.\n" |
| 22879 | "///\n" |
| 22880 | "/// \\param DATA2\n" |
| 22881 | "/// A 64-bit value is and inserted into the 64-bit Data2 field.\n" |
| 22882 | "/// \\param DATA1\n" |
| 22883 | "/// A 32-bit value is inserted into the 32-bit Data1 field.\n" |
| 22884 | "/// \\param FLAGS\n" |
| 22885 | "/// A 32-bit immediate value is inserted into the 32-bit Flags field.\n" |
| 22886 | "#define __lwpval64(DATA2, DATA1, FLAGS) \\\n" |
| 22887 | " (__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \\\n" |
| 22888 | " (unsigned int) (FLAGS)))\n" |
| 22889 | "\n" |
| 22890 | "#endif\n" |
| 22891 | "\n" |
| 22892 | "#undef __DEFAULT_FN_ATTRS\n" |
| 22893 | "\n" |
| 22894 | "#endif /* __LWPINTRIN_H */\n" |
| 22895 | "" } , |
| 22896 | { "/builtins/lzcntintrin.h" , "/*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------===\n" |
| 22897 | " *\n" |
| 22898 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 22899 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 22900 | " * in the Software without restriction, including without limitation the rights\n" |
| 22901 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 22902 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 22903 | " * furnished to do so, subject to the following conditions:\n" |
| 22904 | " *\n" |
| 22905 | " * The above copyright notice and this permission notice shall be included in\n" |
| 22906 | " * all copies or substantial portions of the Software.\n" |
| 22907 | " *\n" |
| 22908 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 22909 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 22910 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 22911 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 22912 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 22913 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 22914 | " * THE SOFTWARE.\n" |
| 22915 | " *\n" |
| 22916 | " *===-----------------------------------------------------------------------===\n" |
| 22917 | " */\n" |
| 22918 | "\n" |
| 22919 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 22920 | "#error \"Never use <lzcntintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 22921 | "#endif\n" |
| 22922 | "\n" |
| 22923 | "#ifndef __LZCNTINTRIN_H\n" |
| 22924 | "#define __LZCNTINTRIN_H\n" |
| 22925 | "\n" |
| 22926 | "/* Define the default attributes for the functions in this file. */\n" |
| 22927 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"lzcnt\")))\n" |
| 22928 | "\n" |
| 22929 | "/// Counts the number of leading zero bits in the operand.\n" |
| 22930 | "///\n" |
| 22931 | "/// \\headerfile <x86intrin.h>\n" |
| 22932 | "///\n" |
| 22933 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
| 22934 | "///\n" |
| 22935 | "/// \\param __X\n" |
| 22936 | "/// An unsigned 16-bit integer whose leading zeros are to be counted.\n" |
| 22937 | "/// \\returns An unsigned 16-bit integer containing the number of leading zero\n" |
| 22938 | "/// bits in the operand.\n" |
| 22939 | "static __inline__ unsigned short __DEFAULT_FN_ATTRS\n" |
| 22940 | "__lzcnt16(unsigned short __X)\n" |
| 22941 | "{\n" |
| 22942 | " return __X ? __builtin_clzs(__X) : 16;\n" |
| 22943 | "}\n" |
| 22944 | "\n" |
| 22945 | "/// Counts the number of leading zero bits in the operand.\n" |
| 22946 | "///\n" |
| 22947 | "/// \\headerfile <x86intrin.h>\n" |
| 22948 | "///\n" |
| 22949 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
| 22950 | "///\n" |
| 22951 | "/// \\param __X\n" |
| 22952 | "/// An unsigned 32-bit integer whose leading zeros are to be counted.\n" |
| 22953 | "/// \\returns An unsigned 32-bit integer containing the number of leading zero\n" |
| 22954 | "/// bits in the operand.\n" |
| 22955 | "/// \\see _lzcnt_u32\n" |
| 22956 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 22957 | "__lzcnt32(unsigned int __X)\n" |
| 22958 | "{\n" |
| 22959 | " return __X ? __builtin_clz(__X) : 32;\n" |
| 22960 | "}\n" |
| 22961 | "\n" |
| 22962 | "/// Counts the number of leading zero bits in the operand.\n" |
| 22963 | "///\n" |
| 22964 | "/// \\headerfile <x86intrin.h>\n" |
| 22965 | "///\n" |
| 22966 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
| 22967 | "///\n" |
| 22968 | "/// \\param __X\n" |
| 22969 | "/// An unsigned 32-bit integer whose leading zeros are to be counted.\n" |
| 22970 | "/// \\returns An unsigned 32-bit integer containing the number of leading zero\n" |
| 22971 | "/// bits in the operand.\n" |
| 22972 | "/// \\see __lzcnt32\n" |
| 22973 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 22974 | "_lzcnt_u32(unsigned int __X)\n" |
| 22975 | "{\n" |
| 22976 | " return __X ? __builtin_clz(__X) : 32;\n" |
| 22977 | "}\n" |
| 22978 | "\n" |
| 22979 | "#ifdef __x86_64__\n" |
| 22980 | "/// Counts the number of leading zero bits in the operand.\n" |
| 22981 | "///\n" |
| 22982 | "/// \\headerfile <x86intrin.h>\n" |
| 22983 | "///\n" |
| 22984 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
| 22985 | "///\n" |
| 22986 | "/// \\param __X\n" |
| 22987 | "/// An unsigned 64-bit integer whose leading zeros are to be counted.\n" |
| 22988 | "/// \\returns An unsigned 64-bit integer containing the number of leading zero\n" |
| 22989 | "/// bits in the operand.\n" |
| 22990 | "/// \\see _lzcnt_u64\n" |
| 22991 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 22992 | "__lzcnt64(unsigned long long __X)\n" |
| 22993 | "{\n" |
| 22994 | " return __X ? __builtin_clzll(__X) : 64;\n" |
| 22995 | "}\n" |
| 22996 | "\n" |
| 22997 | "/// Counts the number of leading zero bits in the operand.\n" |
| 22998 | "///\n" |
| 22999 | "/// \\headerfile <x86intrin.h>\n" |
| 23000 | "///\n" |
| 23001 | "/// This intrinsic corresponds to the \\c LZCNT instruction.\n" |
| 23002 | "///\n" |
| 23003 | "/// \\param __X\n" |
| 23004 | "/// An unsigned 64-bit integer whose leading zeros are to be counted.\n" |
| 23005 | "/// \\returns An unsigned 64-bit integer containing the number of leading zero\n" |
| 23006 | "/// bits in the operand.\n" |
| 23007 | "/// \\see __lzcnt64\n" |
| 23008 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 23009 | "_lzcnt_u64(unsigned long long __X)\n" |
| 23010 | "{\n" |
| 23011 | " return __X ? __builtin_clzll(__X) : 64;\n" |
| 23012 | "}\n" |
| 23013 | "#endif\n" |
| 23014 | "\n" |
| 23015 | "#undef __DEFAULT_FN_ATTRS\n" |
| 23016 | "\n" |
| 23017 | "#endif /* __LZCNTINTRIN_H */\n" |
| 23018 | "" } , |
| 23019 | { "/builtins/mm3dnow.h" , "/*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------===\n" |
| 23020 | " *\n" |
| 23021 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 23022 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 23023 | " * in the Software without restriction, including without limitation the rights\n" |
| 23024 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 23025 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 23026 | " * furnished to do so, subject to the following conditions:\n" |
| 23027 | " *\n" |
| 23028 | " * The above copyright notice and this permission notice shall be included in\n" |
| 23029 | " * all copies or substantial portions of the Software.\n" |
| 23030 | " *\n" |
| 23031 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 23032 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 23033 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 23034 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 23035 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 23036 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 23037 | " * THE SOFTWARE.\n" |
| 23038 | " *\n" |
| 23039 | " *===-----------------------------------------------------------------------===\n" |
| 23040 | " */\n" |
| 23041 | "\n" |
| 23042 | "#ifndef _MM3DNOW_H_INCLUDED\n" |
| 23043 | "#define _MM3DNOW_H_INCLUDED\n" |
| 23044 | "\n" |
| 23045 | "#include <mmintrin.h>\n" |
| 23046 | "#include <prfchwintrin.h>\n" |
| 23047 | "\n" |
| 23048 | "typedef float __v2sf __attribute__((__vector_size__(8)));\n" |
| 23049 | "\n" |
| 23050 | "/* Define the default attributes for the functions in this file. */\n" |
| 23051 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\"), __min_vector_width__(64)))\n" |
| 23052 | "\n" |
| 23053 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"3dnow\")))\n" |
| 23054 | "_m_femms(void) {\n" |
| 23055 | " __builtin_ia32_femms();\n" |
| 23056 | "}\n" |
| 23057 | "\n" |
| 23058 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23059 | "_m_pavgusb(__m64 __m1, __m64 __m2) {\n" |
| 23060 | " return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23061 | "}\n" |
| 23062 | "\n" |
| 23063 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23064 | "_m_pf2id(__m64 __m) {\n" |
| 23065 | " return (__m64)__builtin_ia32_pf2id((__v2sf)__m);\n" |
| 23066 | "}\n" |
| 23067 | "\n" |
| 23068 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23069 | "_m_pfacc(__m64 __m1, __m64 __m2) {\n" |
| 23070 | " return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23071 | "}\n" |
| 23072 | "\n" |
| 23073 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23074 | "_m_pfadd(__m64 __m1, __m64 __m2) {\n" |
| 23075 | " return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23076 | "}\n" |
| 23077 | "\n" |
| 23078 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23079 | "_m_pfcmpeq(__m64 __m1, __m64 __m2) {\n" |
| 23080 | " return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23081 | "}\n" |
| 23082 | "\n" |
| 23083 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23084 | "_m_pfcmpge(__m64 __m1, __m64 __m2) {\n" |
| 23085 | " return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23086 | "}\n" |
| 23087 | "\n" |
| 23088 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23089 | "_m_pfcmpgt(__m64 __m1, __m64 __m2) {\n" |
| 23090 | " return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23091 | "}\n" |
| 23092 | "\n" |
| 23093 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23094 | "_m_pfmax(__m64 __m1, __m64 __m2) {\n" |
| 23095 | " return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23096 | "}\n" |
| 23097 | "\n" |
| 23098 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23099 | "_m_pfmin(__m64 __m1, __m64 __m2) {\n" |
| 23100 | " return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23101 | "}\n" |
| 23102 | "\n" |
| 23103 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23104 | "_m_pfmul(__m64 __m1, __m64 __m2) {\n" |
| 23105 | " return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23106 | "}\n" |
| 23107 | "\n" |
| 23108 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23109 | "_m_pfrcp(__m64 __m) {\n" |
| 23110 | " return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);\n" |
| 23111 | "}\n" |
| 23112 | "\n" |
| 23113 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23114 | "_m_pfrcpit1(__m64 __m1, __m64 __m2) {\n" |
| 23115 | " return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23116 | "}\n" |
| 23117 | "\n" |
| 23118 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23119 | "_m_pfrcpit2(__m64 __m1, __m64 __m2) {\n" |
| 23120 | " return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23121 | "}\n" |
| 23122 | "\n" |
| 23123 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23124 | "_m_pfrsqrt(__m64 __m) {\n" |
| 23125 | " return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);\n" |
| 23126 | "}\n" |
| 23127 | "\n" |
| 23128 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23129 | "_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {\n" |
| 23130 | " return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23131 | "}\n" |
| 23132 | "\n" |
| 23133 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23134 | "_m_pfsub(__m64 __m1, __m64 __m2) {\n" |
| 23135 | " return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23136 | "}\n" |
| 23137 | "\n" |
| 23138 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23139 | "_m_pfsubr(__m64 __m1, __m64 __m2) {\n" |
| 23140 | " return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23141 | "}\n" |
| 23142 | "\n" |
| 23143 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23144 | "_m_pi2fd(__m64 __m) {\n" |
| 23145 | " return (__m64)__builtin_ia32_pi2fd((__v2si)__m);\n" |
| 23146 | "}\n" |
| 23147 | "\n" |
| 23148 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23149 | "_m_pmulhrw(__m64 __m1, __m64 __m2) {\n" |
| 23150 | " return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23151 | "}\n" |
| 23152 | "\n" |
| 23153 | "/* Handle the 3dnowa instructions here. */\n" |
| 23154 | "#undef __DEFAULT_FN_ATTRS\n" |
| 23155 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"3dnowa\"), __min_vector_width__(64)))\n" |
| 23156 | "\n" |
| 23157 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23158 | "_m_pf2iw(__m64 __m) {\n" |
| 23159 | " return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);\n" |
| 23160 | "}\n" |
| 23161 | "\n" |
| 23162 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23163 | "_m_pfnacc(__m64 __m1, __m64 __m2) {\n" |
| 23164 | " return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23165 | "}\n" |
| 23166 | "\n" |
| 23167 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23168 | "_m_pfpnacc(__m64 __m1, __m64 __m2) {\n" |
| 23169 | " return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);\n" |
| 23170 | "}\n" |
| 23171 | "\n" |
| 23172 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23173 | "_m_pi2fw(__m64 __m) {\n" |
| 23174 | " return (__m64)__builtin_ia32_pi2fw((__v2si)__m);\n" |
| 23175 | "}\n" |
| 23176 | "\n" |
| 23177 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23178 | "_m_pswapdsf(__m64 __m) {\n" |
| 23179 | " return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);\n" |
| 23180 | "}\n" |
| 23181 | "\n" |
| 23182 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23183 | "_m_pswapdsi(__m64 __m) {\n" |
| 23184 | " return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);\n" |
| 23185 | "}\n" |
| 23186 | "\n" |
| 23187 | "#undef __DEFAULT_FN_ATTRS\n" |
| 23188 | "\n" |
| 23189 | "#endif\n" |
| 23190 | "" } , |
| 23191 | { "/builtins/mm_malloc.h" , "/*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------===\n" |
| 23192 | " *\n" |
| 23193 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 23194 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 23195 | " * in the Software without restriction, including without limitation the rights\n" |
| 23196 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 23197 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 23198 | " * furnished to do so, subject to the following conditions:\n" |
| 23199 | " *\n" |
| 23200 | " * The above copyright notice and this permission notice shall be included in\n" |
| 23201 | " * all copies or substantial portions of the Software.\n" |
| 23202 | " *\n" |
| 23203 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 23204 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 23205 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 23206 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 23207 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 23208 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 23209 | " * THE SOFTWARE.\n" |
| 23210 | " *\n" |
| 23211 | " *===-----------------------------------------------------------------------===\n" |
| 23212 | " */\n" |
| 23213 | "\n" |
| 23214 | "#ifndef __MM_MALLOC_H\n" |
| 23215 | "#define __MM_MALLOC_H\n" |
| 23216 | "\n" |
| 23217 | "#include <stdlib.h>\n" |
| 23218 | "\n" |
| 23219 | "#ifdef _WIN32\n" |
| 23220 | "#include <malloc.h>\n" |
| 23221 | "#else\n" |
| 23222 | "#ifndef __cplusplus\n" |
| 23223 | "extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n" |
| 23224 | "#else\n" |
| 23225 | "// Some systems (e.g. those with GNU libc) declare posix_memalign with an\n" |
| 23226 | "// exception specifier. Via an \"egregious workaround\" in\n" |
| 23227 | "// Sema::CheckEquivalentExceptionSpec, Clang accepts the following as a valid\n" |
| 23228 | "// redeclaration of glibc's declaration.\n" |
| 23229 | "extern \"C\" int posix_memalign(void **__memptr, size_t __alignment, size_t __size);\n" |
| 23230 | "#endif\n" |
| 23231 | "#endif\n" |
| 23232 | "\n" |
| 23233 | "#if !(defined(_WIN32) && defined(_mm_malloc))\n" |
| 23234 | "static __inline__ void *__attribute__((__always_inline__, __nodebug__,\n" |
| 23235 | " __malloc__))\n" |
| 23236 | "_mm_malloc(size_t __size, size_t __align)\n" |
| 23237 | "{\n" |
| 23238 | " if (__align == 1) {\n" |
| 23239 | " return malloc(__size);\n" |
| 23240 | " }\n" |
| 23241 | "\n" |
| 23242 | " if (!(__align & (__align - 1)) && __align < sizeof(void *))\n" |
| 23243 | " __align = sizeof(void *);\n" |
| 23244 | "\n" |
| 23245 | " void *__mallocedMemory;\n" |
| 23246 | "#if defined(__MINGW32__)\n" |
| 23247 | " __mallocedMemory = __mingw_aligned_malloc(__size, __align);\n" |
| 23248 | "#elif defined(_WIN32)\n" |
| 23249 | " __mallocedMemory = _aligned_malloc(__size, __align);\n" |
| 23250 | "#else\n" |
| 23251 | " if (posix_memalign(&__mallocedMemory, __align, __size))\n" |
| 23252 | " return 0;\n" |
| 23253 | "#endif\n" |
| 23254 | "\n" |
| 23255 | " return __mallocedMemory;\n" |
| 23256 | "}\n" |
| 23257 | "\n" |
| 23258 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
| 23259 | "_mm_free(void *__p)\n" |
| 23260 | "{\n" |
| 23261 | " free(__p);\n" |
| 23262 | "}\n" |
| 23263 | "#endif\n" |
| 23264 | "\n" |
| 23265 | "#endif /* __MM_MALLOC_H */\n" |
| 23266 | "" } , |
| 23267 | { "/builtins/mmintrin.h" , "/*===---- mmintrin.h - MMX intrinsics --------------------------------------===\n" |
| 23268 | " *\n" |
| 23269 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 23270 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 23271 | " * in the Software without restriction, including without limitation the rights\n" |
| 23272 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 23273 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 23274 | " * furnished to do so, subject to the following conditions:\n" |
| 23275 | " *\n" |
| 23276 | " * The above copyright notice and this permission notice shall be included in\n" |
| 23277 | " * all copies or substantial portions of the Software.\n" |
| 23278 | " *\n" |
| 23279 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 23280 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 23281 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 23282 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 23283 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 23284 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 23285 | " * THE SOFTWARE.\n" |
| 23286 | " *\n" |
| 23287 | " *===-----------------------------------------------------------------------===\n" |
| 23288 | " */\n" |
| 23289 | "\n" |
| 23290 | "#ifndef __MMINTRIN_H\n" |
| 23291 | "#define __MMINTRIN_H\n" |
| 23292 | "\n" |
| 23293 | "typedef long long __m64 __attribute__((__vector_size__(8)));\n" |
| 23294 | "\n" |
| 23295 | "typedef long long __v1di __attribute__((__vector_size__(8)));\n" |
| 23296 | "typedef int __v2si __attribute__((__vector_size__(8)));\n" |
| 23297 | "typedef short __v4hi __attribute__((__vector_size__(8)));\n" |
| 23298 | "typedef char __v8qi __attribute__((__vector_size__(8)));\n" |
| 23299 | "\n" |
| 23300 | "/* Define the default attributes for the functions in this file. */\n" |
| 23301 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\"), __min_vector_width__(64)))\n" |
| 23302 | "\n" |
| 23303 | "/// Clears the MMX state by setting the state of the x87 stack registers\n" |
| 23304 | "/// to empty.\n" |
| 23305 | "///\n" |
| 23306 | "/// \\headerfile <x86intrin.h>\n" |
| 23307 | "///\n" |
| 23308 | "/// This intrinsic corresponds to the <c> EMMS </c> instruction.\n" |
| 23309 | "///\n" |
| 23310 | "static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__(\"mmx\")))\n" |
| 23311 | "_mm_empty(void)\n" |
| 23312 | "{\n" |
| 23313 | " __builtin_ia32_emms();\n" |
| 23314 | "}\n" |
| 23315 | "\n" |
| 23316 | "/// Constructs a 64-bit integer vector, setting the lower 32 bits to the\n" |
| 23317 | "/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.\n" |
| 23318 | "///\n" |
| 23319 | "/// \\headerfile <x86intrin.h>\n" |
| 23320 | "///\n" |
| 23321 | "/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n" |
| 23322 | "///\n" |
| 23323 | "/// \\param __i\n" |
| 23324 | "/// A 32-bit integer value.\n" |
| 23325 | "/// \\returns A 64-bit integer vector. The lower 32 bits contain the value of the\n" |
| 23326 | "/// parameter. The upper 32 bits are set to 0.\n" |
| 23327 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23328 | "_mm_cvtsi32_si64(int __i)\n" |
| 23329 | "{\n" |
| 23330 | " return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);\n" |
| 23331 | "}\n" |
| 23332 | "\n" |
| 23333 | "/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit\n" |
| 23334 | "/// signed integer.\n" |
| 23335 | "///\n" |
| 23336 | "/// \\headerfile <x86intrin.h>\n" |
| 23337 | "///\n" |
| 23338 | "/// This intrinsic corresponds to the <c> MOVD </c> instruction.\n" |
| 23339 | "///\n" |
| 23340 | "/// \\param __m\n" |
| 23341 | "/// A 64-bit integer vector.\n" |
| 23342 | "/// \\returns A 32-bit signed integer value containing the lower 32 bits of the\n" |
| 23343 | "/// parameter.\n" |
| 23344 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 23345 | "_mm_cvtsi64_si32(__m64 __m)\n" |
| 23346 | "{\n" |
| 23347 | " return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);\n" |
| 23348 | "}\n" |
| 23349 | "\n" |
| 23350 | "/// Casts a 64-bit signed integer value into a 64-bit integer vector.\n" |
| 23351 | "///\n" |
| 23352 | "/// \\headerfile <x86intrin.h>\n" |
| 23353 | "///\n" |
| 23354 | "/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n" |
| 23355 | "///\n" |
| 23356 | "/// \\param __i\n" |
| 23357 | "/// A 64-bit signed integer.\n" |
| 23358 | "/// \\returns A 64-bit integer vector containing the same bitwise pattern as the\n" |
| 23359 | "/// parameter.\n" |
| 23360 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23361 | "_mm_cvtsi64_m64(long long __i)\n" |
| 23362 | "{\n" |
| 23363 | " return (__m64)__i;\n" |
| 23364 | "}\n" |
| 23365 | "\n" |
| 23366 | "/// Casts a 64-bit integer vector into a 64-bit signed integer value.\n" |
| 23367 | "///\n" |
| 23368 | "/// \\headerfile <x86intrin.h>\n" |
| 23369 | "///\n" |
| 23370 | "/// This intrinsic corresponds to the <c> MOVQ </c> instruction.\n" |
| 23371 | "///\n" |
| 23372 | "/// \\param __m\n" |
| 23373 | "/// A 64-bit integer vector.\n" |
| 23374 | "/// \\returns A 64-bit signed integer containing the same bitwise pattern as the\n" |
| 23375 | "/// parameter.\n" |
| 23376 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
| 23377 | "_mm_cvtm64_si64(__m64 __m)\n" |
| 23378 | "{\n" |
| 23379 | " return (long long)__m;\n" |
| 23380 | "}\n" |
| 23381 | "\n" |
| 23382 | "/// Converts 16-bit signed integers from both 64-bit integer vector\n" |
| 23383 | "/// parameters of [4 x i16] into 8-bit signed integer values, and constructs\n" |
| 23384 | "/// a 64-bit integer vector of [8 x i8] as the result. Positive values\n" |
| 23385 | "/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80\n" |
| 23386 | "/// are saturated to 0x80.\n" |
| 23387 | "///\n" |
| 23388 | "/// \\headerfile <x86intrin.h>\n" |
| 23389 | "///\n" |
| 23390 | "/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.\n" |
| 23391 | "///\n" |
| 23392 | "/// \\param __m1\n" |
| 23393 | "/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n" |
| 23394 | "/// 16-bit signed integer and is converted to an 8-bit signed integer with\n" |
| 23395 | "/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n" |
| 23396 | "/// Negative values less than 0x80 are saturated to 0x80. The converted\n" |
| 23397 | "/// [4 x i8] values are written to the lower 32 bits of the result.\n" |
| 23398 | "/// \\param __m2\n" |
| 23399 | "/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n" |
| 23400 | "/// 16-bit signed integer and is converted to an 8-bit signed integer with\n" |
| 23401 | "/// saturation. Positive values greater than 0x7F are saturated to 0x7F.\n" |
| 23402 | "/// Negative values less than 0x80 are saturated to 0x80. The converted\n" |
| 23403 | "/// [4 x i8] values are written to the upper 32 bits of the result.\n" |
| 23404 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n" |
| 23405 | "/// values.\n" |
| 23406 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23407 | "_mm_packs_pi16(__m64 __m1, __m64 __m2)\n" |
| 23408 | "{\n" |
| 23409 | " return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23410 | "}\n" |
| 23411 | "\n" |
| 23412 | "/// Converts 32-bit signed integers from both 64-bit integer vector\n" |
| 23413 | "/// parameters of [2 x i32] into 16-bit signed integer values, and constructs\n" |
| 23414 | "/// a 64-bit integer vector of [4 x i16] as the result. Positive values\n" |
| 23415 | "/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than\n" |
| 23416 | "/// 0x8000 are saturated to 0x8000.\n" |
| 23417 | "///\n" |
| 23418 | "/// \\headerfile <x86intrin.h>\n" |
| 23419 | "///\n" |
| 23420 | "/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.\n" |
| 23421 | "///\n" |
| 23422 | "/// \\param __m1\n" |
| 23423 | "/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n" |
| 23424 | "/// 32-bit signed integer and is converted to a 16-bit signed integer with\n" |
| 23425 | "/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n" |
| 23426 | "/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n" |
| 23427 | "/// [2 x i16] values are written to the lower 32 bits of the result.\n" |
| 23428 | "/// \\param __m2\n" |
| 23429 | "/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a\n" |
| 23430 | "/// 32-bit signed integer and is converted to a 16-bit signed integer with\n" |
| 23431 | "/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.\n" |
| 23432 | "/// Negative values less than 0x8000 are saturated to 0x8000. The converted\n" |
| 23433 | "/// [2 x i16] values are written to the upper 32 bits of the result.\n" |
| 23434 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n" |
| 23435 | "/// values.\n" |
| 23436 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23437 | "_mm_packs_pi32(__m64 __m1, __m64 __m2)\n" |
| 23438 | "{\n" |
| 23439 | " return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);\n" |
| 23440 | "}\n" |
| 23441 | "\n" |
| 23442 | "/// Converts 16-bit signed integers from both 64-bit integer vector\n" |
| 23443 | "/// parameters of [4 x i16] into 8-bit unsigned integer values, and\n" |
| 23444 | "/// constructs a 64-bit integer vector of [8 x i8] as the result. Values\n" |
| 23445 | "/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated\n" |
| 23446 | "/// to 0.\n" |
| 23447 | "///\n" |
| 23448 | "/// \\headerfile <x86intrin.h>\n" |
| 23449 | "///\n" |
| 23450 | "/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.\n" |
| 23451 | "///\n" |
| 23452 | "/// \\param __m1\n" |
| 23453 | "/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n" |
| 23454 | "/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n" |
| 23455 | "/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
| 23456 | "/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n" |
| 23457 | "/// the lower 32 bits of the result.\n" |
| 23458 | "/// \\param __m2\n" |
| 23459 | "/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a\n" |
| 23460 | "/// 16-bit signed integer and is converted to an 8-bit unsigned integer with\n" |
| 23461 | "/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less\n" |
| 23462 | "/// than 0 are saturated to 0. The converted [4 x i8] values are written to\n" |
| 23463 | "/// the upper 32 bits of the result.\n" |
| 23464 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the converted\n" |
| 23465 | "/// values.\n" |
| 23466 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23467 | "_mm_packs_pu16(__m64 __m1, __m64 __m2)\n" |
| 23468 | "{\n" |
| 23469 | " return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23470 | "}\n" |
| 23471 | "\n" |
| 23472 | "/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]\n" |
| 23473 | "/// and interleaves them into a 64-bit integer vector of [8 x i8].\n" |
| 23474 | "///\n" |
| 23475 | "/// \\headerfile <x86intrin.h>\n" |
| 23476 | "///\n" |
| 23477 | "/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.\n" |
| 23478 | "///\n" |
| 23479 | "/// \\param __m1\n" |
| 23480 | "/// A 64-bit integer vector of [8 x i8]. \\n\n" |
| 23481 | "/// Bits [39:32] are written to bits [7:0] of the result. \\n\n" |
| 23482 | "/// Bits [47:40] are written to bits [23:16] of the result. \\n\n" |
| 23483 | "/// Bits [55:48] are written to bits [39:32] of the result. \\n\n" |
| 23484 | "/// Bits [63:56] are written to bits [55:48] of the result.\n" |
| 23485 | "/// \\param __m2\n" |
| 23486 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23487 | "/// Bits [39:32] are written to bits [15:8] of the result. \\n\n" |
| 23488 | "/// Bits [47:40] are written to bits [31:24] of the result. \\n\n" |
| 23489 | "/// Bits [55:48] are written to bits [47:40] of the result. \\n\n" |
| 23490 | "/// Bits [63:56] are written to bits [63:56] of the result.\n" |
| 23491 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n" |
| 23492 | "/// values.\n" |
| 23493 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23494 | "_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)\n" |
| 23495 | "{\n" |
| 23496 | " return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23497 | "}\n" |
| 23498 | "\n" |
| 23499 | "/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n" |
| 23500 | "/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n" |
| 23501 | "///\n" |
| 23502 | "/// \\headerfile <x86intrin.h>\n" |
| 23503 | "///\n" |
| 23504 | "/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.\n" |
| 23505 | "///\n" |
| 23506 | "/// \\param __m1\n" |
| 23507 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23508 | "/// Bits [47:32] are written to bits [15:0] of the result. \\n\n" |
| 23509 | "/// Bits [63:48] are written to bits [47:32] of the result.\n" |
| 23510 | "/// \\param __m2\n" |
| 23511 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23512 | "/// Bits [47:32] are written to bits [31:16] of the result. \\n\n" |
| 23513 | "/// Bits [63:48] are written to bits [63:48] of the result.\n" |
| 23514 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n" |
| 23515 | "/// values.\n" |
| 23516 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23517 | "_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)\n" |
| 23518 | "{\n" |
| 23519 | " return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23520 | "}\n" |
| 23521 | "\n" |
| 23522 | "/// Unpacks the upper 32 bits from two 64-bit integer vectors of\n" |
| 23523 | "/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n" |
| 23524 | "///\n" |
| 23525 | "/// \\headerfile <x86intrin.h>\n" |
| 23526 | "///\n" |
| 23527 | "/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.\n" |
| 23528 | "///\n" |
| 23529 | "/// \\param __m1\n" |
| 23530 | "/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n" |
| 23531 | "/// the lower 32 bits of the result.\n" |
| 23532 | "/// \\param __m2\n" |
| 23533 | "/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to\n" |
| 23534 | "/// the upper 32 bits of the result.\n" |
| 23535 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n" |
| 23536 | "/// values.\n" |
| 23537 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23538 | "_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)\n" |
| 23539 | "{\n" |
| 23540 | " return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);\n" |
| 23541 | "}\n" |
| 23542 | "\n" |
| 23543 | "/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]\n" |
| 23544 | "/// and interleaves them into a 64-bit integer vector of [8 x i8].\n" |
| 23545 | "///\n" |
| 23546 | "/// \\headerfile <x86intrin.h>\n" |
| 23547 | "///\n" |
| 23548 | "/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.\n" |
| 23549 | "///\n" |
| 23550 | "/// \\param __m1\n" |
| 23551 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23552 | "/// Bits [7:0] are written to bits [7:0] of the result. \\n\n" |
| 23553 | "/// Bits [15:8] are written to bits [23:16] of the result. \\n\n" |
| 23554 | "/// Bits [23:16] are written to bits [39:32] of the result. \\n\n" |
| 23555 | "/// Bits [31:24] are written to bits [55:48] of the result.\n" |
| 23556 | "/// \\param __m2\n" |
| 23557 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23558 | "/// Bits [7:0] are written to bits [15:8] of the result. \\n\n" |
| 23559 | "/// Bits [15:8] are written to bits [31:24] of the result. \\n\n" |
| 23560 | "/// Bits [23:16] are written to bits [47:40] of the result. \\n\n" |
| 23561 | "/// Bits [31:24] are written to bits [63:56] of the result.\n" |
| 23562 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the interleaved\n" |
| 23563 | "/// values.\n" |
| 23564 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23565 | "_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)\n" |
| 23566 | "{\n" |
| 23567 | " return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23568 | "}\n" |
| 23569 | "\n" |
| 23570 | "/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n" |
| 23571 | "/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].\n" |
| 23572 | "///\n" |
| 23573 | "/// \\headerfile <x86intrin.h>\n" |
| 23574 | "///\n" |
| 23575 | "/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.\n" |
| 23576 | "///\n" |
| 23577 | "/// \\param __m1\n" |
| 23578 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23579 | "/// Bits [15:0] are written to bits [15:0] of the result. \\n\n" |
| 23580 | "/// Bits [31:16] are written to bits [47:32] of the result.\n" |
| 23581 | "/// \\param __m2\n" |
| 23582 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23583 | "/// Bits [15:0] are written to bits [31:16] of the result. \\n\n" |
| 23584 | "/// Bits [31:16] are written to bits [63:48] of the result.\n" |
| 23585 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the interleaved\n" |
| 23586 | "/// values.\n" |
| 23587 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23588 | "_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)\n" |
| 23589 | "{\n" |
| 23590 | " return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23591 | "}\n" |
| 23592 | "\n" |
| 23593 | "/// Unpacks the lower 32 bits from two 64-bit integer vectors of\n" |
| 23594 | "/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].\n" |
| 23595 | "///\n" |
| 23596 | "/// \\headerfile <x86intrin.h>\n" |
| 23597 | "///\n" |
| 23598 | "/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.\n" |
| 23599 | "///\n" |
| 23600 | "/// \\param __m1\n" |
| 23601 | "/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n" |
| 23602 | "/// the lower 32 bits of the result.\n" |
| 23603 | "/// \\param __m2\n" |
| 23604 | "/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to\n" |
| 23605 | "/// the upper 32 bits of the result.\n" |
| 23606 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the interleaved\n" |
| 23607 | "/// values.\n" |
| 23608 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23609 | "_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)\n" |
| 23610 | "{\n" |
| 23611 | " return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);\n" |
| 23612 | "}\n" |
| 23613 | "\n" |
| 23614 | "/// Adds each 8-bit integer element of the first 64-bit integer vector\n" |
| 23615 | "/// of [8 x i8] to the corresponding 8-bit integer element of the second\n" |
| 23616 | "/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are\n" |
| 23617 | "/// packed into a 64-bit integer vector of [8 x i8].\n" |
| 23618 | "///\n" |
| 23619 | "/// \\headerfile <x86intrin.h>\n" |
| 23620 | "///\n" |
| 23621 | "/// This intrinsic corresponds to the <c> PADDB </c> instruction.\n" |
| 23622 | "///\n" |
| 23623 | "/// \\param __m1\n" |
| 23624 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23625 | "/// \\param __m2\n" |
| 23626 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23627 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the sums of both\n" |
| 23628 | "/// parameters.\n" |
| 23629 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23630 | "_mm_add_pi8(__m64 __m1, __m64 __m2)\n" |
| 23631 | "{\n" |
| 23632 | " return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23633 | "}\n" |
| 23634 | "\n" |
| 23635 | "/// Adds each 16-bit integer element of the first 64-bit integer vector\n" |
| 23636 | "/// of [4 x i16] to the corresponding 16-bit integer element of the second\n" |
| 23637 | "/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are\n" |
| 23638 | "/// packed into a 64-bit integer vector of [4 x i16].\n" |
| 23639 | "///\n" |
| 23640 | "/// \\headerfile <x86intrin.h>\n" |
| 23641 | "///\n" |
| 23642 | "/// This intrinsic corresponds to the <c> PADDW </c> instruction.\n" |
| 23643 | "///\n" |
| 23644 | "/// \\param __m1\n" |
| 23645 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23646 | "/// \\param __m2\n" |
| 23647 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23648 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the sums of both\n" |
| 23649 | "/// parameters.\n" |
| 23650 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23651 | "_mm_add_pi16(__m64 __m1, __m64 __m2)\n" |
| 23652 | "{\n" |
| 23653 | " return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23654 | "}\n" |
| 23655 | "\n" |
| 23656 | "/// Adds each 32-bit integer element of the first 64-bit integer vector\n" |
| 23657 | "/// of [2 x i32] to the corresponding 32-bit integer element of the second\n" |
| 23658 | "/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are\n" |
| 23659 | "/// packed into a 64-bit integer vector of [2 x i32].\n" |
| 23660 | "///\n" |
| 23661 | "/// \\headerfile <x86intrin.h>\n" |
| 23662 | "///\n" |
| 23663 | "/// This intrinsic corresponds to the <c> PADDD </c> instruction.\n" |
| 23664 | "///\n" |
| 23665 | "/// \\param __m1\n" |
| 23666 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 23667 | "/// \\param __m2\n" |
| 23668 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 23669 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of both\n" |
| 23670 | "/// parameters.\n" |
| 23671 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23672 | "_mm_add_pi32(__m64 __m1, __m64 __m2)\n" |
| 23673 | "{\n" |
| 23674 | " return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);\n" |
| 23675 | "}\n" |
| 23676 | "\n" |
| 23677 | "/// Adds each 8-bit signed integer element of the first 64-bit integer\n" |
| 23678 | "/// vector of [8 x i8] to the corresponding 8-bit signed integer element of\n" |
| 23679 | "/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than\n" |
| 23680 | "/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to\n" |
| 23681 | "/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].\n" |
| 23682 | "///\n" |
| 23683 | "/// \\headerfile <x86intrin.h>\n" |
| 23684 | "///\n" |
| 23685 | "/// This intrinsic corresponds to the <c> PADDSB </c> instruction.\n" |
| 23686 | "///\n" |
| 23687 | "/// \\param __m1\n" |
| 23688 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23689 | "/// \\param __m2\n" |
| 23690 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23691 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated sums\n" |
| 23692 | "/// of both parameters.\n" |
| 23693 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23694 | "_mm_adds_pi8(__m64 __m1, __m64 __m2)\n" |
| 23695 | "{\n" |
| 23696 | " return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23697 | "}\n" |
| 23698 | "\n" |
| 23699 | "/// Adds each 16-bit signed integer element of the first 64-bit integer\n" |
| 23700 | "/// vector of [4 x i16] to the corresponding 16-bit signed integer element of\n" |
| 23701 | "/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than\n" |
| 23702 | "/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are\n" |
| 23703 | "/// saturated to 0x8000. The results are packed into a 64-bit integer vector\n" |
| 23704 | "/// of [4 x i16].\n" |
| 23705 | "///\n" |
| 23706 | "/// \\headerfile <x86intrin.h>\n" |
| 23707 | "///\n" |
| 23708 | "/// This intrinsic corresponds to the <c> PADDSW </c> instruction.\n" |
| 23709 | "///\n" |
| 23710 | "/// \\param __m1\n" |
| 23711 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23712 | "/// \\param __m2\n" |
| 23713 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23714 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated sums\n" |
| 23715 | "/// of both parameters.\n" |
| 23716 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23717 | "_mm_adds_pi16(__m64 __m1, __m64 __m2)\n" |
| 23718 | "{\n" |
| 23719 | " return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23720 | "}\n" |
| 23721 | "\n" |
| 23722 | "/// Adds each 8-bit unsigned integer element of the first 64-bit integer\n" |
| 23723 | "/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of\n" |
| 23724 | "/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are\n" |
| 23725 | "/// saturated to 0xFF. The results are packed into a 64-bit integer vector of\n" |
| 23726 | "/// [8 x i8].\n" |
| 23727 | "///\n" |
| 23728 | "/// \\headerfile <x86intrin.h>\n" |
| 23729 | "///\n" |
| 23730 | "/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.\n" |
| 23731 | "///\n" |
| 23732 | "/// \\param __m1\n" |
| 23733 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23734 | "/// \\param __m2\n" |
| 23735 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 23736 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n" |
| 23737 | "/// unsigned sums of both parameters.\n" |
| 23738 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23739 | "_mm_adds_pu8(__m64 __m1, __m64 __m2)\n" |
| 23740 | "{\n" |
| 23741 | " return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23742 | "}\n" |
| 23743 | "\n" |
| 23744 | "/// Adds each 16-bit unsigned integer element of the first 64-bit integer\n" |
| 23745 | "/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element\n" |
| 23746 | "/// of the second 64-bit integer vector of [4 x i16]. Sums greater than\n" |
| 23747 | "/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit\n" |
| 23748 | "/// integer vector of [4 x i16].\n" |
| 23749 | "///\n" |
| 23750 | "/// \\headerfile <x86intrin.h>\n" |
| 23751 | "///\n" |
| 23752 | "/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.\n" |
| 23753 | "///\n" |
| 23754 | "/// \\param __m1\n" |
| 23755 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23756 | "/// \\param __m2\n" |
| 23757 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23758 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n" |
| 23759 | "/// unsigned sums of both parameters.\n" |
| 23760 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23761 | "_mm_adds_pu16(__m64 __m1, __m64 __m2)\n" |
| 23762 | "{\n" |
| 23763 | " return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23764 | "}\n" |
| 23765 | "\n" |
| 23766 | "/// Subtracts each 8-bit integer element of the second 64-bit integer\n" |
| 23767 | "/// vector of [8 x i8] from the corresponding 8-bit integer element of the\n" |
| 23768 | "/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results\n" |
| 23769 | "/// are packed into a 64-bit integer vector of [8 x i8].\n" |
| 23770 | "///\n" |
| 23771 | "/// \\headerfile <x86intrin.h>\n" |
| 23772 | "///\n" |
| 23773 | "/// This intrinsic corresponds to the <c> PSUBB </c> instruction.\n" |
| 23774 | "///\n" |
| 23775 | "/// \\param __m1\n" |
| 23776 | "/// A 64-bit integer vector of [8 x i8] containing the minuends.\n" |
| 23777 | "/// \\param __m2\n" |
| 23778 | "/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n" |
| 23779 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the differences of\n" |
| 23780 | "/// both parameters.\n" |
| 23781 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23782 | "_mm_sub_pi8(__m64 __m1, __m64 __m2)\n" |
| 23783 | "{\n" |
| 23784 | " return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23785 | "}\n" |
| 23786 | "\n" |
| 23787 | "/// Subtracts each 16-bit integer element of the second 64-bit integer\n" |
| 23788 | "/// vector of [4 x i16] from the corresponding 16-bit integer element of the\n" |
| 23789 | "/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the\n" |
| 23790 | "/// results are packed into a 64-bit integer vector of [4 x i16].\n" |
| 23791 | "///\n" |
| 23792 | "/// \\headerfile <x86intrin.h>\n" |
| 23793 | "///\n" |
| 23794 | "/// This intrinsic corresponds to the <c> PSUBW </c> instruction.\n" |
| 23795 | "///\n" |
| 23796 | "/// \\param __m1\n" |
| 23797 | "/// A 64-bit integer vector of [4 x i16] containing the minuends.\n" |
| 23798 | "/// \\param __m2\n" |
| 23799 | "/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n" |
| 23800 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the differences of\n" |
| 23801 | "/// both parameters.\n" |
| 23802 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23803 | "_mm_sub_pi16(__m64 __m1, __m64 __m2)\n" |
| 23804 | "{\n" |
| 23805 | " return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23806 | "}\n" |
| 23807 | "\n" |
| 23808 | "/// Subtracts each 32-bit integer element of the second 64-bit integer\n" |
| 23809 | "/// vector of [2 x i32] from the corresponding 32-bit integer element of the\n" |
| 23810 | "/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the\n" |
| 23811 | "/// results are packed into a 64-bit integer vector of [2 x i32].\n" |
| 23812 | "///\n" |
| 23813 | "/// \\headerfile <x86intrin.h>\n" |
| 23814 | "///\n" |
| 23815 | "/// This intrinsic corresponds to the <c> PSUBD </c> instruction.\n" |
| 23816 | "///\n" |
| 23817 | "/// \\param __m1\n" |
| 23818 | "/// A 64-bit integer vector of [2 x i32] containing the minuends.\n" |
| 23819 | "/// \\param __m2\n" |
| 23820 | "/// A 64-bit integer vector of [2 x i32] containing the subtrahends.\n" |
| 23821 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the differences of\n" |
| 23822 | "/// both parameters.\n" |
| 23823 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23824 | "_mm_sub_pi32(__m64 __m1, __m64 __m2)\n" |
| 23825 | "{\n" |
| 23826 | " return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);\n" |
| 23827 | "}\n" |
| 23828 | "\n" |
| 23829 | "/// Subtracts each 8-bit signed integer element of the second 64-bit\n" |
| 23830 | "/// integer vector of [8 x i8] from the corresponding 8-bit signed integer\n" |
| 23831 | "/// element of the first 64-bit integer vector of [8 x i8]. Positive results\n" |
| 23832 | "/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80\n" |
| 23833 | "/// are saturated to 0x80. The results are packed into a 64-bit integer\n" |
| 23834 | "/// vector of [8 x i8].\n" |
| 23835 | "///\n" |
| 23836 | "/// \\headerfile <x86intrin.h>\n" |
| 23837 | "///\n" |
| 23838 | "/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.\n" |
| 23839 | "///\n" |
| 23840 | "/// \\param __m1\n" |
| 23841 | "/// A 64-bit integer vector of [8 x i8] containing the minuends.\n" |
| 23842 | "/// \\param __m2\n" |
| 23843 | "/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n" |
| 23844 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n" |
| 23845 | "/// differences of both parameters.\n" |
| 23846 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23847 | "_mm_subs_pi8(__m64 __m1, __m64 __m2)\n" |
| 23848 | "{\n" |
| 23849 | " return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23850 | "}\n" |
| 23851 | "\n" |
| 23852 | "/// Subtracts each 16-bit signed integer element of the second 64-bit\n" |
| 23853 | "/// integer vector of [4 x i16] from the corresponding 16-bit signed integer\n" |
| 23854 | "/// element of the first 64-bit integer vector of [4 x i16]. Positive results\n" |
| 23855 | "/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than\n" |
| 23856 | "/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit\n" |
| 23857 | "/// integer vector of [4 x i16].\n" |
| 23858 | "///\n" |
| 23859 | "/// \\headerfile <x86intrin.h>\n" |
| 23860 | "///\n" |
| 23861 | "/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.\n" |
| 23862 | "///\n" |
| 23863 | "/// \\param __m1\n" |
| 23864 | "/// A 64-bit integer vector of [4 x i16] containing the minuends.\n" |
| 23865 | "/// \\param __m2\n" |
| 23866 | "/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n" |
| 23867 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n" |
| 23868 | "/// differences of both parameters.\n" |
| 23869 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23870 | "_mm_subs_pi16(__m64 __m1, __m64 __m2)\n" |
| 23871 | "{\n" |
| 23872 | " return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23873 | "}\n" |
| 23874 | "\n" |
| 23875 | "/// Subtracts each 8-bit unsigned integer element of the second 64-bit\n" |
| 23876 | "/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer\n" |
| 23877 | "/// element of the first 64-bit integer vector of [8 x i8].\n" |
| 23878 | "///\n" |
| 23879 | "/// If an element of the first vector is less than the corresponding element\n" |
| 23880 | "/// of the second vector, the result is saturated to 0. The results are\n" |
| 23881 | "/// packed into a 64-bit integer vector of [8 x i8].\n" |
| 23882 | "///\n" |
| 23883 | "/// \\headerfile <x86intrin.h>\n" |
| 23884 | "///\n" |
| 23885 | "/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.\n" |
| 23886 | "///\n" |
| 23887 | "/// \\param __m1\n" |
| 23888 | "/// A 64-bit integer vector of [8 x i8] containing the minuends.\n" |
| 23889 | "/// \\param __m2\n" |
| 23890 | "/// A 64-bit integer vector of [8 x i8] containing the subtrahends.\n" |
| 23891 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the saturated\n" |
| 23892 | "/// differences of both parameters.\n" |
| 23893 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23894 | "_mm_subs_pu8(__m64 __m1, __m64 __m2)\n" |
| 23895 | "{\n" |
| 23896 | " return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 23897 | "}\n" |
| 23898 | "\n" |
| 23899 | "/// Subtracts each 16-bit unsigned integer element of the second 64-bit\n" |
| 23900 | "/// integer vector of [4 x i16] from the corresponding 16-bit unsigned\n" |
| 23901 | "/// integer element of the first 64-bit integer vector of [4 x i16].\n" |
| 23902 | "///\n" |
| 23903 | "/// If an element of the first vector is less than the corresponding element\n" |
| 23904 | "/// of the second vector, the result is saturated to 0. The results are\n" |
| 23905 | "/// packed into a 64-bit integer vector of [4 x i16].\n" |
| 23906 | "///\n" |
| 23907 | "/// \\headerfile <x86intrin.h>\n" |
| 23908 | "///\n" |
| 23909 | "/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.\n" |
| 23910 | "///\n" |
| 23911 | "/// \\param __m1\n" |
| 23912 | "/// A 64-bit integer vector of [4 x i16] containing the minuends.\n" |
| 23913 | "/// \\param __m2\n" |
| 23914 | "/// A 64-bit integer vector of [4 x i16] containing the subtrahends.\n" |
| 23915 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the saturated\n" |
| 23916 | "/// differences of both parameters.\n" |
| 23917 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23918 | "_mm_subs_pu16(__m64 __m1, __m64 __m2)\n" |
| 23919 | "{\n" |
| 23920 | " return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23921 | "}\n" |
| 23922 | "\n" |
| 23923 | "/// Multiplies each 16-bit signed integer element of the first 64-bit\n" |
| 23924 | "/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n" |
| 23925 | "/// element of the second 64-bit integer vector of [4 x i16] and get four\n" |
| 23926 | "/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.\n" |
| 23927 | "/// The lower 32 bits of these two sums are packed into a 64-bit integer\n" |
| 23928 | "/// vector of [2 x i32].\n" |
| 23929 | "///\n" |
| 23930 | "/// For example, bits [15:0] of both parameters are multiplied, bits [31:16]\n" |
| 23931 | "/// of both parameters are multiplied, and the sum of both results is written\n" |
| 23932 | "/// to bits [31:0] of the result.\n" |
| 23933 | "///\n" |
| 23934 | "/// \\headerfile <x86intrin.h>\n" |
| 23935 | "///\n" |
| 23936 | "/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.\n" |
| 23937 | "///\n" |
| 23938 | "/// \\param __m1\n" |
| 23939 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23940 | "/// \\param __m2\n" |
| 23941 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23942 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the sums of\n" |
| 23943 | "/// products of both parameters.\n" |
| 23944 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23945 | "_mm_madd_pi16(__m64 __m1, __m64 __m2)\n" |
| 23946 | "{\n" |
| 23947 | " return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23948 | "}\n" |
| 23949 | "\n" |
| 23950 | "/// Multiplies each 16-bit signed integer element of the first 64-bit\n" |
| 23951 | "/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n" |
| 23952 | "/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper\n" |
| 23953 | "/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n" |
| 23954 | "///\n" |
| 23955 | "/// \\headerfile <x86intrin.h>\n" |
| 23956 | "///\n" |
| 23957 | "/// This intrinsic corresponds to the <c> PMULHW </c> instruction.\n" |
| 23958 | "///\n" |
| 23959 | "/// \\param __m1\n" |
| 23960 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23961 | "/// \\param __m2\n" |
| 23962 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23963 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits\n" |
| 23964 | "/// of the products of both parameters.\n" |
| 23965 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23966 | "_mm_mulhi_pi16(__m64 __m1, __m64 __m2)\n" |
| 23967 | "{\n" |
| 23968 | " return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23969 | "}\n" |
| 23970 | "\n" |
| 23971 | "/// Multiplies each 16-bit signed integer element of the first 64-bit\n" |
| 23972 | "/// integer vector of [4 x i16] by the corresponding 16-bit signed integer\n" |
| 23973 | "/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower\n" |
| 23974 | "/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].\n" |
| 23975 | "///\n" |
| 23976 | "/// \\headerfile <x86intrin.h>\n" |
| 23977 | "///\n" |
| 23978 | "/// This intrinsic corresponds to the <c> PMULLW </c> instruction.\n" |
| 23979 | "///\n" |
| 23980 | "/// \\param __m1\n" |
| 23981 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23982 | "/// \\param __m2\n" |
| 23983 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 23984 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits\n" |
| 23985 | "/// of the products of both parameters.\n" |
| 23986 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 23987 | "_mm_mullo_pi16(__m64 __m1, __m64 __m2)\n" |
| 23988 | "{\n" |
| 23989 | " return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 23990 | "}\n" |
| 23991 | "\n" |
| 23992 | "/// Left-shifts each 16-bit signed integer element of the first\n" |
| 23993 | "/// parameter, which is a 64-bit integer vector of [4 x i16], by the number\n" |
| 23994 | "/// of bits specified by the second parameter, which is a 64-bit integer. The\n" |
| 23995 | "/// lower 16 bits of the results are packed into a 64-bit integer vector of\n" |
| 23996 | "/// [4 x i16].\n" |
| 23997 | "///\n" |
| 23998 | "/// \\headerfile <x86intrin.h>\n" |
| 23999 | "///\n" |
| 24000 | "/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n" |
| 24001 | "///\n" |
| 24002 | "/// \\param __m\n" |
| 24003 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24004 | "/// \\param __count\n" |
| 24005 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24006 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n" |
| 24007 | "/// values. If \\a __count is greater or equal to 16, the result is set to all\n" |
| 24008 | "/// 0.\n" |
| 24009 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24010 | "_mm_sll_pi16(__m64 __m, __m64 __count)\n" |
| 24011 | "{\n" |
| 24012 | " return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);\n" |
| 24013 | "}\n" |
| 24014 | "\n" |
| 24015 | "/// Left-shifts each 16-bit signed integer element of a 64-bit integer\n" |
| 24016 | "/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.\n" |
| 24017 | "/// The lower 16 bits of the results are packed into a 64-bit integer vector\n" |
| 24018 | "/// of [4 x i16].\n" |
| 24019 | "///\n" |
| 24020 | "/// \\headerfile <x86intrin.h>\n" |
| 24021 | "///\n" |
| 24022 | "/// This intrinsic corresponds to the <c> PSLLW </c> instruction.\n" |
| 24023 | "///\n" |
| 24024 | "/// \\param __m\n" |
| 24025 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24026 | "/// \\param __count\n" |
| 24027 | "/// A 32-bit integer value.\n" |
| 24028 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the left-shifted\n" |
| 24029 | "/// values. If \\a __count is greater or equal to 16, the result is set to all\n" |
| 24030 | "/// 0.\n" |
| 24031 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24032 | "_mm_slli_pi16(__m64 __m, int __count)\n" |
| 24033 | "{\n" |
| 24034 | " return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);\n" |
| 24035 | "}\n" |
| 24036 | "\n" |
| 24037 | "/// Left-shifts each 32-bit signed integer element of the first\n" |
| 24038 | "/// parameter, which is a 64-bit integer vector of [2 x i32], by the number\n" |
| 24039 | "/// of bits specified by the second parameter, which is a 64-bit integer. The\n" |
| 24040 | "/// lower 32 bits of the results are packed into a 64-bit integer vector of\n" |
| 24041 | "/// [2 x i32].\n" |
| 24042 | "///\n" |
| 24043 | "/// \\headerfile <x86intrin.h>\n" |
| 24044 | "///\n" |
| 24045 | "/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n" |
| 24046 | "///\n" |
| 24047 | "/// \\param __m\n" |
| 24048 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24049 | "/// \\param __count\n" |
| 24050 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24051 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n" |
| 24052 | "/// values. If \\a __count is greater or equal to 32, the result is set to all\n" |
| 24053 | "/// 0.\n" |
| 24054 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24055 | "_mm_sll_pi32(__m64 __m, __m64 __count)\n" |
| 24056 | "{\n" |
| 24057 | " return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);\n" |
| 24058 | "}\n" |
| 24059 | "\n" |
| 24060 | "/// Left-shifts each 32-bit signed integer element of a 64-bit integer\n" |
| 24061 | "/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.\n" |
| 24062 | "/// The lower 32 bits of the results are packed into a 64-bit integer vector\n" |
| 24063 | "/// of [2 x i32].\n" |
| 24064 | "///\n" |
| 24065 | "/// \\headerfile <x86intrin.h>\n" |
| 24066 | "///\n" |
| 24067 | "/// This intrinsic corresponds to the <c> PSLLD </c> instruction.\n" |
| 24068 | "///\n" |
| 24069 | "/// \\param __m\n" |
| 24070 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24071 | "/// \\param __count\n" |
| 24072 | "/// A 32-bit integer value.\n" |
| 24073 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the left-shifted\n" |
| 24074 | "/// values. If \\a __count is greater or equal to 32, the result is set to all\n" |
| 24075 | "/// 0.\n" |
| 24076 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24077 | "_mm_slli_pi32(__m64 __m, int __count)\n" |
| 24078 | "{\n" |
| 24079 | " return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);\n" |
| 24080 | "}\n" |
| 24081 | "\n" |
| 24082 | "/// Left-shifts the first 64-bit integer parameter by the number of bits\n" |
| 24083 | "/// specified by the second 64-bit integer parameter. The lower 64 bits of\n" |
| 24084 | "/// result are returned.\n" |
| 24085 | "///\n" |
| 24086 | "/// \\headerfile <x86intrin.h>\n" |
| 24087 | "///\n" |
| 24088 | "/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n" |
| 24089 | "///\n" |
| 24090 | "/// \\param __m\n" |
| 24091 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24092 | "/// \\param __count\n" |
| 24093 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24094 | "/// \\returns A 64-bit integer vector containing the left-shifted value. If\n" |
| 24095 | "/// \\a __count is greater or equal to 64, the result is set to 0.\n" |
| 24096 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24097 | "_mm_sll_si64(__m64 __m, __m64 __count)\n" |
| 24098 | "{\n" |
| 24099 | " return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);\n" |
| 24100 | "}\n" |
| 24101 | "\n" |
| 24102 | "/// Left-shifts the first parameter, which is a 64-bit integer, by the\n" |
| 24103 | "/// number of bits specified by the second parameter, which is a 32-bit\n" |
| 24104 | "/// integer. The lower 64 bits of result are returned.\n" |
| 24105 | "///\n" |
| 24106 | "/// \\headerfile <x86intrin.h>\n" |
| 24107 | "///\n" |
| 24108 | "/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.\n" |
| 24109 | "///\n" |
| 24110 | "/// \\param __m\n" |
| 24111 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24112 | "/// \\param __count\n" |
| 24113 | "/// A 32-bit integer value.\n" |
| 24114 | "/// \\returns A 64-bit integer vector containing the left-shifted value. If\n" |
| 24115 | "/// \\a __count is greater or equal to 64, the result is set to 0.\n" |
| 24116 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24117 | "_mm_slli_si64(__m64 __m, int __count)\n" |
| 24118 | "{\n" |
| 24119 | " return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);\n" |
| 24120 | "}\n" |
| 24121 | "\n" |
| 24122 | "/// Right-shifts each 16-bit integer element of the first parameter,\n" |
| 24123 | "/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n" |
| 24124 | "/// specified by the second parameter, which is a 64-bit integer.\n" |
| 24125 | "///\n" |
| 24126 | "/// High-order bits are filled with the sign bit of the initial value of each\n" |
| 24127 | "/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n" |
| 24128 | "/// vector of [4 x i16].\n" |
| 24129 | "///\n" |
| 24130 | "/// \\headerfile <x86intrin.h>\n" |
| 24131 | "///\n" |
| 24132 | "/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n" |
| 24133 | "///\n" |
| 24134 | "/// \\param __m\n" |
| 24135 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24136 | "/// \\param __count\n" |
| 24137 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24138 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n" |
| 24139 | "/// values.\n" |
| 24140 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24141 | "_mm_sra_pi16(__m64 __m, __m64 __count)\n" |
| 24142 | "{\n" |
| 24143 | " return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);\n" |
| 24144 | "}\n" |
| 24145 | "\n" |
| 24146 | "/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n" |
| 24147 | "/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n" |
| 24148 | "///\n" |
| 24149 | "/// High-order bits are filled with the sign bit of the initial value of each\n" |
| 24150 | "/// 16-bit element. The 16-bit results are packed into a 64-bit integer\n" |
| 24151 | "/// vector of [4 x i16].\n" |
| 24152 | "///\n" |
| 24153 | "/// \\headerfile <x86intrin.h>\n" |
| 24154 | "///\n" |
| 24155 | "/// This intrinsic corresponds to the <c> PSRAW </c> instruction.\n" |
| 24156 | "///\n" |
| 24157 | "/// \\param __m\n" |
| 24158 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24159 | "/// \\param __count\n" |
| 24160 | "/// A 32-bit integer value.\n" |
| 24161 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n" |
| 24162 | "/// values.\n" |
| 24163 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24164 | "_mm_srai_pi16(__m64 __m, int __count)\n" |
| 24165 | "{\n" |
| 24166 | " return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);\n" |
| 24167 | "}\n" |
| 24168 | "\n" |
| 24169 | "/// Right-shifts each 32-bit integer element of the first parameter,\n" |
| 24170 | "/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n" |
| 24171 | "/// specified by the second parameter, which is a 64-bit integer.\n" |
| 24172 | "///\n" |
| 24173 | "/// High-order bits are filled with the sign bit of the initial value of each\n" |
| 24174 | "/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n" |
| 24175 | "/// vector of [2 x i32].\n" |
| 24176 | "///\n" |
| 24177 | "/// \\headerfile <x86intrin.h>\n" |
| 24178 | "///\n" |
| 24179 | "/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n" |
| 24180 | "///\n" |
| 24181 | "/// \\param __m\n" |
| 24182 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24183 | "/// \\param __count\n" |
| 24184 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24185 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n" |
| 24186 | "/// values.\n" |
| 24187 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24188 | "_mm_sra_pi32(__m64 __m, __m64 __count)\n" |
| 24189 | "{\n" |
| 24190 | " return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);\n" |
| 24191 | "}\n" |
| 24192 | "\n" |
| 24193 | "/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n" |
| 24194 | "/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n" |
| 24195 | "///\n" |
| 24196 | "/// High-order bits are filled with the sign bit of the initial value of each\n" |
| 24197 | "/// 32-bit element. The 32-bit results are packed into a 64-bit integer\n" |
| 24198 | "/// vector of [2 x i32].\n" |
| 24199 | "///\n" |
| 24200 | "/// \\headerfile <x86intrin.h>\n" |
| 24201 | "///\n" |
| 24202 | "/// This intrinsic corresponds to the <c> PSRAD </c> instruction.\n" |
| 24203 | "///\n" |
| 24204 | "/// \\param __m\n" |
| 24205 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24206 | "/// \\param __count\n" |
| 24207 | "/// A 32-bit integer value.\n" |
| 24208 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n" |
| 24209 | "/// values.\n" |
| 24210 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24211 | "_mm_srai_pi32(__m64 __m, int __count)\n" |
| 24212 | "{\n" |
| 24213 | " return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);\n" |
| 24214 | "}\n" |
| 24215 | "\n" |
| 24216 | "/// Right-shifts each 16-bit integer element of the first parameter,\n" |
| 24217 | "/// which is a 64-bit integer vector of [4 x i16], by the number of bits\n" |
| 24218 | "/// specified by the second parameter, which is a 64-bit integer.\n" |
| 24219 | "///\n" |
| 24220 | "/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n" |
| 24221 | "/// integer vector of [4 x i16].\n" |
| 24222 | "///\n" |
| 24223 | "/// \\headerfile <x86intrin.h>\n" |
| 24224 | "///\n" |
| 24225 | "/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n" |
| 24226 | "///\n" |
| 24227 | "/// \\param __m\n" |
| 24228 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24229 | "/// \\param __count\n" |
| 24230 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24231 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n" |
| 24232 | "/// values.\n" |
| 24233 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24234 | "_mm_srl_pi16(__m64 __m, __m64 __count)\n" |
| 24235 | "{\n" |
| 24236 | " return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);\n" |
| 24237 | "}\n" |
| 24238 | "\n" |
| 24239 | "/// Right-shifts each 16-bit integer element of a 64-bit integer vector\n" |
| 24240 | "/// of [4 x i16] by the number of bits specified by a 32-bit integer.\n" |
| 24241 | "///\n" |
| 24242 | "/// High-order bits are cleared. The 16-bit results are packed into a 64-bit\n" |
| 24243 | "/// integer vector of [4 x i16].\n" |
| 24244 | "///\n" |
| 24245 | "/// \\headerfile <x86intrin.h>\n" |
| 24246 | "///\n" |
| 24247 | "/// This intrinsic corresponds to the <c> PSRLW </c> instruction.\n" |
| 24248 | "///\n" |
| 24249 | "/// \\param __m\n" |
| 24250 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24251 | "/// \\param __count\n" |
| 24252 | "/// A 32-bit integer value.\n" |
| 24253 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the right-shifted\n" |
| 24254 | "/// values.\n" |
| 24255 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24256 | "_mm_srli_pi16(__m64 __m, int __count)\n" |
| 24257 | "{\n" |
| 24258 | " return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);\n" |
| 24259 | "}\n" |
| 24260 | "\n" |
| 24261 | "/// Right-shifts each 32-bit integer element of the first parameter,\n" |
| 24262 | "/// which is a 64-bit integer vector of [2 x i32], by the number of bits\n" |
| 24263 | "/// specified by the second parameter, which is a 64-bit integer.\n" |
| 24264 | "///\n" |
| 24265 | "/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n" |
| 24266 | "/// integer vector of [2 x i32].\n" |
| 24267 | "///\n" |
| 24268 | "/// \\headerfile <x86intrin.h>\n" |
| 24269 | "///\n" |
| 24270 | "/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n" |
| 24271 | "///\n" |
| 24272 | "/// \\param __m\n" |
| 24273 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24274 | "/// \\param __count\n" |
| 24275 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24276 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n" |
| 24277 | "/// values.\n" |
| 24278 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24279 | "_mm_srl_pi32(__m64 __m, __m64 __count)\n" |
| 24280 | "{\n" |
| 24281 | " return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);\n" |
| 24282 | "}\n" |
| 24283 | "\n" |
| 24284 | "/// Right-shifts each 32-bit integer element of a 64-bit integer vector\n" |
| 24285 | "/// of [2 x i32] by the number of bits specified by a 32-bit integer.\n" |
| 24286 | "///\n" |
| 24287 | "/// High-order bits are cleared. The 32-bit results are packed into a 64-bit\n" |
| 24288 | "/// integer vector of [2 x i32].\n" |
| 24289 | "///\n" |
| 24290 | "/// \\headerfile <x86intrin.h>\n" |
| 24291 | "///\n" |
| 24292 | "/// This intrinsic corresponds to the <c> PSRLD </c> instruction.\n" |
| 24293 | "///\n" |
| 24294 | "/// \\param __m\n" |
| 24295 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24296 | "/// \\param __count\n" |
| 24297 | "/// A 32-bit integer value.\n" |
| 24298 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the right-shifted\n" |
| 24299 | "/// values.\n" |
| 24300 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24301 | "_mm_srli_pi32(__m64 __m, int __count)\n" |
| 24302 | "{\n" |
| 24303 | " return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);\n" |
| 24304 | "}\n" |
| 24305 | "\n" |
| 24306 | "/// Right-shifts the first 64-bit integer parameter by the number of bits\n" |
| 24307 | "/// specified by the second 64-bit integer parameter.\n" |
| 24308 | "///\n" |
| 24309 | "/// High-order bits are cleared.\n" |
| 24310 | "///\n" |
| 24311 | "/// \\headerfile <x86intrin.h>\n" |
| 24312 | "///\n" |
| 24313 | "/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n" |
| 24314 | "///\n" |
| 24315 | "/// \\param __m\n" |
| 24316 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24317 | "/// \\param __count\n" |
| 24318 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24319 | "/// \\returns A 64-bit integer vector containing the right-shifted value.\n" |
| 24320 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24321 | "_mm_srl_si64(__m64 __m, __m64 __count)\n" |
| 24322 | "{\n" |
| 24323 | " return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);\n" |
| 24324 | "}\n" |
| 24325 | "\n" |
| 24326 | "/// Right-shifts the first parameter, which is a 64-bit integer, by the\n" |
| 24327 | "/// number of bits specified by the second parameter, which is a 32-bit\n" |
| 24328 | "/// integer.\n" |
| 24329 | "///\n" |
| 24330 | "/// High-order bits are cleared.\n" |
| 24331 | "///\n" |
| 24332 | "/// \\headerfile <x86intrin.h>\n" |
| 24333 | "///\n" |
| 24334 | "/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.\n" |
| 24335 | "///\n" |
| 24336 | "/// \\param __m\n" |
| 24337 | "/// A 64-bit integer vector interpreted as a single 64-bit integer.\n" |
| 24338 | "/// \\param __count\n" |
| 24339 | "/// A 32-bit integer value.\n" |
| 24340 | "/// \\returns A 64-bit integer vector containing the right-shifted value.\n" |
| 24341 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24342 | "_mm_srli_si64(__m64 __m, int __count)\n" |
| 24343 | "{\n" |
| 24344 | " return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);\n" |
| 24345 | "}\n" |
| 24346 | "\n" |
| 24347 | "/// Performs a bitwise AND of two 64-bit integer vectors.\n" |
| 24348 | "///\n" |
| 24349 | "/// \\headerfile <x86intrin.h>\n" |
| 24350 | "///\n" |
| 24351 | "/// This intrinsic corresponds to the <c> PAND </c> instruction.\n" |
| 24352 | "///\n" |
| 24353 | "/// \\param __m1\n" |
| 24354 | "/// A 64-bit integer vector.\n" |
| 24355 | "/// \\param __m2\n" |
| 24356 | "/// A 64-bit integer vector.\n" |
| 24357 | "/// \\returns A 64-bit integer vector containing the bitwise AND of both\n" |
| 24358 | "/// parameters.\n" |
| 24359 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24360 | "_mm_and_si64(__m64 __m1, __m64 __m2)\n" |
| 24361 | "{\n" |
| 24362 | " return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);\n" |
| 24363 | "}\n" |
| 24364 | "\n" |
| 24365 | "/// Performs a bitwise NOT of the first 64-bit integer vector, and then\n" |
| 24366 | "/// performs a bitwise AND of the intermediate result and the second 64-bit\n" |
| 24367 | "/// integer vector.\n" |
| 24368 | "///\n" |
| 24369 | "/// \\headerfile <x86intrin.h>\n" |
| 24370 | "///\n" |
| 24371 | "/// This intrinsic corresponds to the <c> PANDN </c> instruction.\n" |
| 24372 | "///\n" |
| 24373 | "/// \\param __m1\n" |
| 24374 | "/// A 64-bit integer vector. The one's complement of this parameter is used\n" |
| 24375 | "/// in the bitwise AND.\n" |
| 24376 | "/// \\param __m2\n" |
| 24377 | "/// A 64-bit integer vector.\n" |
| 24378 | "/// \\returns A 64-bit integer vector containing the bitwise AND of the second\n" |
| 24379 | "/// parameter and the one's complement of the first parameter.\n" |
| 24380 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24381 | "_mm_andnot_si64(__m64 __m1, __m64 __m2)\n" |
| 24382 | "{\n" |
| 24383 | " return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);\n" |
| 24384 | "}\n" |
| 24385 | "\n" |
| 24386 | "/// Performs a bitwise OR of two 64-bit integer vectors.\n" |
| 24387 | "///\n" |
| 24388 | "/// \\headerfile <x86intrin.h>\n" |
| 24389 | "///\n" |
| 24390 | "/// This intrinsic corresponds to the <c> POR </c> instruction.\n" |
| 24391 | "///\n" |
| 24392 | "/// \\param __m1\n" |
| 24393 | "/// A 64-bit integer vector.\n" |
| 24394 | "/// \\param __m2\n" |
| 24395 | "/// A 64-bit integer vector.\n" |
| 24396 | "/// \\returns A 64-bit integer vector containing the bitwise OR of both\n" |
| 24397 | "/// parameters.\n" |
| 24398 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24399 | "_mm_or_si64(__m64 __m1, __m64 __m2)\n" |
| 24400 | "{\n" |
| 24401 | " return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);\n" |
| 24402 | "}\n" |
| 24403 | "\n" |
| 24404 | "/// Performs a bitwise exclusive OR of two 64-bit integer vectors.\n" |
| 24405 | "///\n" |
| 24406 | "/// \\headerfile <x86intrin.h>\n" |
| 24407 | "///\n" |
| 24408 | "/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n" |
| 24409 | "///\n" |
| 24410 | "/// \\param __m1\n" |
| 24411 | "/// A 64-bit integer vector.\n" |
| 24412 | "/// \\param __m2\n" |
| 24413 | "/// A 64-bit integer vector.\n" |
| 24414 | "/// \\returns A 64-bit integer vector containing the bitwise exclusive OR of both\n" |
| 24415 | "/// parameters.\n" |
| 24416 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24417 | "_mm_xor_si64(__m64 __m1, __m64 __m2)\n" |
| 24418 | "{\n" |
| 24419 | " return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);\n" |
| 24420 | "}\n" |
| 24421 | "\n" |
| 24422 | "/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n" |
| 24423 | "/// [8 x i8] to determine if the element of the first vector is equal to the\n" |
| 24424 | "/// corresponding element of the second vector.\n" |
| 24425 | "///\n" |
| 24426 | "/// The comparison yields 0 for false, 0xFF for true.\n" |
| 24427 | "///\n" |
| 24428 | "/// \\headerfile <x86intrin.h>\n" |
| 24429 | "///\n" |
| 24430 | "/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.\n" |
| 24431 | "///\n" |
| 24432 | "/// \\param __m1\n" |
| 24433 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 24434 | "/// \\param __m2\n" |
| 24435 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 24436 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n" |
| 24437 | "/// results.\n" |
| 24438 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24439 | "_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)\n" |
| 24440 | "{\n" |
| 24441 | " return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 24442 | "}\n" |
| 24443 | "\n" |
| 24444 | "/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n" |
| 24445 | "/// [4 x i16] to determine if the element of the first vector is equal to the\n" |
| 24446 | "/// corresponding element of the second vector.\n" |
| 24447 | "///\n" |
| 24448 | "/// The comparison yields 0 for false, 0xFFFF for true.\n" |
| 24449 | "///\n" |
| 24450 | "/// \\headerfile <x86intrin.h>\n" |
| 24451 | "///\n" |
| 24452 | "/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.\n" |
| 24453 | "///\n" |
| 24454 | "/// \\param __m1\n" |
| 24455 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24456 | "/// \\param __m2\n" |
| 24457 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24458 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n" |
| 24459 | "/// results.\n" |
| 24460 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24461 | "_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)\n" |
| 24462 | "{\n" |
| 24463 | " return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 24464 | "}\n" |
| 24465 | "\n" |
| 24466 | "/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n" |
| 24467 | "/// [2 x i32] to determine if the element of the first vector is equal to the\n" |
| 24468 | "/// corresponding element of the second vector.\n" |
| 24469 | "///\n" |
| 24470 | "/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n" |
| 24471 | "///\n" |
| 24472 | "/// \\headerfile <x86intrin.h>\n" |
| 24473 | "///\n" |
| 24474 | "/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.\n" |
| 24475 | "///\n" |
| 24476 | "/// \\param __m1\n" |
| 24477 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24478 | "/// \\param __m2\n" |
| 24479 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24480 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n" |
| 24481 | "/// results.\n" |
| 24482 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24483 | "_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)\n" |
| 24484 | "{\n" |
| 24485 | " return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);\n" |
| 24486 | "}\n" |
| 24487 | "\n" |
| 24488 | "/// Compares the 8-bit integer elements of two 64-bit integer vectors of\n" |
| 24489 | "/// [8 x i8] to determine if the element of the first vector is greater than\n" |
| 24490 | "/// the corresponding element of the second vector.\n" |
| 24491 | "///\n" |
| 24492 | "/// The comparison yields 0 for false, 0xFF for true.\n" |
| 24493 | "///\n" |
| 24494 | "/// \\headerfile <x86intrin.h>\n" |
| 24495 | "///\n" |
| 24496 | "/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.\n" |
| 24497 | "///\n" |
| 24498 | "/// \\param __m1\n" |
| 24499 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 24500 | "/// \\param __m2\n" |
| 24501 | "/// A 64-bit integer vector of [8 x i8].\n" |
| 24502 | "/// \\returns A 64-bit integer vector of [8 x i8] containing the comparison\n" |
| 24503 | "/// results.\n" |
| 24504 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24505 | "_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)\n" |
| 24506 | "{\n" |
| 24507 | " return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);\n" |
| 24508 | "}\n" |
| 24509 | "\n" |
| 24510 | "/// Compares the 16-bit integer elements of two 64-bit integer vectors of\n" |
| 24511 | "/// [4 x i16] to determine if the element of the first vector is greater than\n" |
| 24512 | "/// the corresponding element of the second vector.\n" |
| 24513 | "///\n" |
| 24514 | "/// The comparison yields 0 for false, 0xFFFF for true.\n" |
| 24515 | "///\n" |
| 24516 | "/// \\headerfile <x86intrin.h>\n" |
| 24517 | "///\n" |
| 24518 | "/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.\n" |
| 24519 | "///\n" |
| 24520 | "/// \\param __m1\n" |
| 24521 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24522 | "/// \\param __m2\n" |
| 24523 | "/// A 64-bit integer vector of [4 x i16].\n" |
| 24524 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the comparison\n" |
| 24525 | "/// results.\n" |
| 24526 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24527 | "_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)\n" |
| 24528 | "{\n" |
| 24529 | " return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);\n" |
| 24530 | "}\n" |
| 24531 | "\n" |
| 24532 | "/// Compares the 32-bit integer elements of two 64-bit integer vectors of\n" |
| 24533 | "/// [2 x i32] to determine if the element of the first vector is greater than\n" |
| 24534 | "/// the corresponding element of the second vector.\n" |
| 24535 | "///\n" |
| 24536 | "/// The comparison yields 0 for false, 0xFFFFFFFF for true.\n" |
| 24537 | "///\n" |
| 24538 | "/// \\headerfile <x86intrin.h>\n" |
| 24539 | "///\n" |
| 24540 | "/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.\n" |
| 24541 | "///\n" |
| 24542 | "/// \\param __m1\n" |
| 24543 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24544 | "/// \\param __m2\n" |
| 24545 | "/// A 64-bit integer vector of [2 x i32].\n" |
| 24546 | "/// \\returns A 64-bit integer vector of [2 x i32] containing the comparison\n" |
| 24547 | "/// results.\n" |
| 24548 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24549 | "_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)\n" |
| 24550 | "{\n" |
| 24551 | " return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);\n" |
| 24552 | "}\n" |
| 24553 | "\n" |
| 24554 | "/// Constructs a 64-bit integer vector initialized to zero.\n" |
| 24555 | "///\n" |
| 24556 | "/// \\headerfile <x86intrin.h>\n" |
| 24557 | "///\n" |
| 24558 | "/// This intrinsic corresponds to the <c> PXOR </c> instruction.\n" |
| 24559 | "///\n" |
| 24560 | "/// \\returns An initialized 64-bit integer vector with all elements set to zero.\n" |
| 24561 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24562 | "_mm_setzero_si64(void)\n" |
| 24563 | "{\n" |
| 24564 | " return __extension__ (__m64){ 0LL };\n" |
| 24565 | "}\n" |
| 24566 | "\n" |
| 24567 | "/// Constructs a 64-bit integer vector initialized with the specified\n" |
| 24568 | "/// 32-bit integer values.\n" |
| 24569 | "///\n" |
| 24570 | "/// \\headerfile <x86intrin.h>\n" |
| 24571 | "///\n" |
| 24572 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24573 | "/// instruction.\n" |
| 24574 | "///\n" |
| 24575 | "/// \\param __i1\n" |
| 24576 | "/// A 32-bit integer value used to initialize the upper 32 bits of the\n" |
| 24577 | "/// result.\n" |
| 24578 | "/// \\param __i0\n" |
| 24579 | "/// A 32-bit integer value used to initialize the lower 32 bits of the\n" |
| 24580 | "/// result.\n" |
| 24581 | "/// \\returns An initialized 64-bit integer vector.\n" |
| 24582 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24583 | "_mm_set_pi32(int __i1, int __i0)\n" |
| 24584 | "{\n" |
| 24585 | " return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);\n" |
| 24586 | "}\n" |
| 24587 | "\n" |
| 24588 | "/// Constructs a 64-bit integer vector initialized with the specified\n" |
| 24589 | "/// 16-bit integer values.\n" |
| 24590 | "///\n" |
| 24591 | "/// \\headerfile <x86intrin.h>\n" |
| 24592 | "///\n" |
| 24593 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24594 | "/// instruction.\n" |
| 24595 | "///\n" |
| 24596 | "/// \\param __s3\n" |
| 24597 | "/// A 16-bit integer value used to initialize bits [63:48] of the result.\n" |
| 24598 | "/// \\param __s2\n" |
| 24599 | "/// A 16-bit integer value used to initialize bits [47:32] of the result.\n" |
| 24600 | "/// \\param __s1\n" |
| 24601 | "/// A 16-bit integer value used to initialize bits [31:16] of the result.\n" |
| 24602 | "/// \\param __s0\n" |
| 24603 | "/// A 16-bit integer value used to initialize bits [15:0] of the result.\n" |
| 24604 | "/// \\returns An initialized 64-bit integer vector.\n" |
| 24605 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24606 | "_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)\n" |
| 24607 | "{\n" |
| 24608 | " return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);\n" |
| 24609 | "}\n" |
| 24610 | "\n" |
| 24611 | "/// Constructs a 64-bit integer vector initialized with the specified\n" |
| 24612 | "/// 8-bit integer values.\n" |
| 24613 | "///\n" |
| 24614 | "/// \\headerfile <x86intrin.h>\n" |
| 24615 | "///\n" |
| 24616 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24617 | "/// instruction.\n" |
| 24618 | "///\n" |
| 24619 | "/// \\param __b7\n" |
| 24620 | "/// An 8-bit integer value used to initialize bits [63:56] of the result.\n" |
| 24621 | "/// \\param __b6\n" |
| 24622 | "/// An 8-bit integer value used to initialize bits [55:48] of the result.\n" |
| 24623 | "/// \\param __b5\n" |
| 24624 | "/// An 8-bit integer value used to initialize bits [47:40] of the result.\n" |
| 24625 | "/// \\param __b4\n" |
| 24626 | "/// An 8-bit integer value used to initialize bits [39:32] of the result.\n" |
| 24627 | "/// \\param __b3\n" |
| 24628 | "/// An 8-bit integer value used to initialize bits [31:24] of the result.\n" |
| 24629 | "/// \\param __b2\n" |
| 24630 | "/// An 8-bit integer value used to initialize bits [23:16] of the result.\n" |
| 24631 | "/// \\param __b1\n" |
| 24632 | "/// An 8-bit integer value used to initialize bits [15:8] of the result.\n" |
| 24633 | "/// \\param __b0\n" |
| 24634 | "/// An 8-bit integer value used to initialize bits [7:0] of the result.\n" |
| 24635 | "/// \\returns An initialized 64-bit integer vector.\n" |
| 24636 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24637 | "_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,\n" |
| 24638 | " char __b1, char __b0)\n" |
| 24639 | "{\n" |
| 24640 | " return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,\n" |
| 24641 | " __b4, __b5, __b6, __b7);\n" |
| 24642 | "}\n" |
| 24643 | "\n" |
| 24644 | "/// Constructs a 64-bit integer vector of [2 x i32], with each of the\n" |
| 24645 | "/// 32-bit integer vector elements set to the specified 32-bit integer\n" |
| 24646 | "/// value.\n" |
| 24647 | "///\n" |
| 24648 | "/// \\headerfile <x86intrin.h>\n" |
| 24649 | "///\n" |
| 24650 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24651 | "/// instruction.\n" |
| 24652 | "///\n" |
| 24653 | "/// \\param __i\n" |
| 24654 | "/// A 32-bit integer value used to initialize each vector element of the\n" |
| 24655 | "/// result.\n" |
| 24656 | "/// \\returns An initialized 64-bit integer vector of [2 x i32].\n" |
| 24657 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24658 | "_mm_set1_pi32(int __i)\n" |
| 24659 | "{\n" |
| 24660 | " return _mm_set_pi32(__i, __i);\n" |
| 24661 | "}\n" |
| 24662 | "\n" |
| 24663 | "/// Constructs a 64-bit integer vector of [4 x i16], with each of the\n" |
| 24664 | "/// 16-bit integer vector elements set to the specified 16-bit integer\n" |
| 24665 | "/// value.\n" |
| 24666 | "///\n" |
| 24667 | "/// \\headerfile <x86intrin.h>\n" |
| 24668 | "///\n" |
| 24669 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24670 | "/// instruction.\n" |
| 24671 | "///\n" |
| 24672 | "/// \\param __w\n" |
| 24673 | "/// A 16-bit integer value used to initialize each vector element of the\n" |
| 24674 | "/// result.\n" |
| 24675 | "/// \\returns An initialized 64-bit integer vector of [4 x i16].\n" |
| 24676 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24677 | "_mm_set1_pi16(short __w)\n" |
| 24678 | "{\n" |
| 24679 | " return _mm_set_pi16(__w, __w, __w, __w);\n" |
| 24680 | "}\n" |
| 24681 | "\n" |
| 24682 | "/// Constructs a 64-bit integer vector of [8 x i8], with each of the\n" |
| 24683 | "/// 8-bit integer vector elements set to the specified 8-bit integer value.\n" |
| 24684 | "///\n" |
| 24685 | "/// \\headerfile <x86intrin.h>\n" |
| 24686 | "///\n" |
| 24687 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24688 | "/// instruction.\n" |
| 24689 | "///\n" |
| 24690 | "/// \\param __b\n" |
| 24691 | "/// An 8-bit integer value used to initialize each vector element of the\n" |
| 24692 | "/// result.\n" |
| 24693 | "/// \\returns An initialized 64-bit integer vector of [8 x i8].\n" |
| 24694 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24695 | "_mm_set1_pi8(char __b)\n" |
| 24696 | "{\n" |
| 24697 | " return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);\n" |
| 24698 | "}\n" |
| 24699 | "\n" |
| 24700 | "/// Constructs a 64-bit integer vector, initialized in reverse order with\n" |
| 24701 | "/// the specified 32-bit integer values.\n" |
| 24702 | "///\n" |
| 24703 | "/// \\headerfile <x86intrin.h>\n" |
| 24704 | "///\n" |
| 24705 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24706 | "/// instruction.\n" |
| 24707 | "///\n" |
| 24708 | "/// \\param __i0\n" |
| 24709 | "/// A 32-bit integer value used to initialize the lower 32 bits of the\n" |
| 24710 | "/// result.\n" |
| 24711 | "/// \\param __i1\n" |
| 24712 | "/// A 32-bit integer value used to initialize the upper 32 bits of the\n" |
| 24713 | "/// result.\n" |
| 24714 | "/// \\returns An initialized 64-bit integer vector.\n" |
| 24715 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24716 | "_mm_setr_pi32(int __i0, int __i1)\n" |
| 24717 | "{\n" |
| 24718 | " return _mm_set_pi32(__i1, __i0);\n" |
| 24719 | "}\n" |
| 24720 | "\n" |
| 24721 | "/// Constructs a 64-bit integer vector, initialized in reverse order with\n" |
| 24722 | "/// the specified 16-bit integer values.\n" |
| 24723 | "///\n" |
| 24724 | "/// \\headerfile <x86intrin.h>\n" |
| 24725 | "///\n" |
| 24726 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24727 | "/// instruction.\n" |
| 24728 | "///\n" |
| 24729 | "/// \\param __w0\n" |
| 24730 | "/// A 16-bit integer value used to initialize bits [15:0] of the result.\n" |
| 24731 | "/// \\param __w1\n" |
| 24732 | "/// A 16-bit integer value used to initialize bits [31:16] of the result.\n" |
| 24733 | "/// \\param __w2\n" |
| 24734 | "/// A 16-bit integer value used to initialize bits [47:32] of the result.\n" |
| 24735 | "/// \\param __w3\n" |
| 24736 | "/// A 16-bit integer value used to initialize bits [63:48] of the result.\n" |
| 24737 | "/// \\returns An initialized 64-bit integer vector.\n" |
| 24738 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24739 | "_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)\n" |
| 24740 | "{\n" |
| 24741 | " return _mm_set_pi16(__w3, __w2, __w1, __w0);\n" |
| 24742 | "}\n" |
| 24743 | "\n" |
| 24744 | "/// Constructs a 64-bit integer vector, initialized in reverse order with\n" |
| 24745 | "/// the specified 8-bit integer values.\n" |
| 24746 | "///\n" |
| 24747 | "/// \\headerfile <x86intrin.h>\n" |
| 24748 | "///\n" |
| 24749 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 24750 | "/// instruction.\n" |
| 24751 | "///\n" |
| 24752 | "/// \\param __b0\n" |
| 24753 | "/// An 8-bit integer value used to initialize bits [7:0] of the result.\n" |
| 24754 | "/// \\param __b1\n" |
| 24755 | "/// An 8-bit integer value used to initialize bits [15:8] of the result.\n" |
| 24756 | "/// \\param __b2\n" |
| 24757 | "/// An 8-bit integer value used to initialize bits [23:16] of the result.\n" |
| 24758 | "/// \\param __b3\n" |
| 24759 | "/// An 8-bit integer value used to initialize bits [31:24] of the result.\n" |
| 24760 | "/// \\param __b4\n" |
| 24761 | "/// An 8-bit integer value used to initialize bits [39:32] of the result.\n" |
| 24762 | "/// \\param __b5\n" |
| 24763 | "/// An 8-bit integer value used to initialize bits [47:40] of the result.\n" |
| 24764 | "/// \\param __b6\n" |
| 24765 | "/// An 8-bit integer value used to initialize bits [55:48] of the result.\n" |
| 24766 | "/// \\param __b7\n" |
| 24767 | "/// An 8-bit integer value used to initialize bits [63:56] of the result.\n" |
| 24768 | "/// \\returns An initialized 64-bit integer vector.\n" |
| 24769 | "static __inline__ __m64 __DEFAULT_FN_ATTRS\n" |
| 24770 | "_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,\n" |
| 24771 | " char __b6, char __b7)\n" |
| 24772 | "{\n" |
| 24773 | " return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);\n" |
| 24774 | "}\n" |
| 24775 | "\n" |
| 24776 | "#undef __DEFAULT_FN_ATTRS\n" |
| 24777 | "\n" |
| 24778 | "/* Aliases for compatibility. */\n" |
| 24779 | "#define _m_empty _mm_empty\n" |
| 24780 | "#define _m_from_int _mm_cvtsi32_si64\n" |
| 24781 | "#define _m_from_int64 _mm_cvtsi64_m64\n" |
| 24782 | "#define _m_to_int _mm_cvtsi64_si32\n" |
| 24783 | "#define _m_to_int64 _mm_cvtm64_si64\n" |
| 24784 | "#define _m_packsswb _mm_packs_pi16\n" |
| 24785 | "#define _m_packssdw _mm_packs_pi32\n" |
| 24786 | "#define _m_packuswb _mm_packs_pu16\n" |
| 24787 | "#define _m_punpckhbw _mm_unpackhi_pi8\n" |
| 24788 | "#define _m_punpckhwd _mm_unpackhi_pi16\n" |
| 24789 | "#define _m_punpckhdq _mm_unpackhi_pi32\n" |
| 24790 | "#define _m_punpcklbw _mm_unpacklo_pi8\n" |
| 24791 | "#define _m_punpcklwd _mm_unpacklo_pi16\n" |
| 24792 | "#define _m_punpckldq _mm_unpacklo_pi32\n" |
| 24793 | "#define _m_paddb _mm_add_pi8\n" |
| 24794 | "#define _m_paddw _mm_add_pi16\n" |
| 24795 | "#define _m_paddd _mm_add_pi32\n" |
| 24796 | "#define _m_paddsb _mm_adds_pi8\n" |
| 24797 | "#define _m_paddsw _mm_adds_pi16\n" |
| 24798 | "#define _m_paddusb _mm_adds_pu8\n" |
| 24799 | "#define _m_paddusw _mm_adds_pu16\n" |
| 24800 | "#define _m_psubb _mm_sub_pi8\n" |
| 24801 | "#define _m_psubw _mm_sub_pi16\n" |
| 24802 | "#define _m_psubd _mm_sub_pi32\n" |
| 24803 | "#define _m_psubsb _mm_subs_pi8\n" |
| 24804 | "#define _m_psubsw _mm_subs_pi16\n" |
| 24805 | "#define _m_psubusb _mm_subs_pu8\n" |
| 24806 | "#define _m_psubusw _mm_subs_pu16\n" |
| 24807 | "#define _m_pmaddwd _mm_madd_pi16\n" |
| 24808 | "#define _m_pmulhw _mm_mulhi_pi16\n" |
| 24809 | "#define _m_pmullw _mm_mullo_pi16\n" |
| 24810 | "#define _m_psllw _mm_sll_pi16\n" |
| 24811 | "#define _m_psllwi _mm_slli_pi16\n" |
| 24812 | "#define _m_pslld _mm_sll_pi32\n" |
| 24813 | "#define _m_pslldi _mm_slli_pi32\n" |
| 24814 | "#define _m_psllq _mm_sll_si64\n" |
| 24815 | "#define _m_psllqi _mm_slli_si64\n" |
| 24816 | "#define _m_psraw _mm_sra_pi16\n" |
| 24817 | "#define _m_psrawi _mm_srai_pi16\n" |
| 24818 | "#define _m_psrad _mm_sra_pi32\n" |
| 24819 | "#define _m_psradi _mm_srai_pi32\n" |
| 24820 | "#define _m_psrlw _mm_srl_pi16\n" |
| 24821 | "#define _m_psrlwi _mm_srli_pi16\n" |
| 24822 | "#define _m_psrld _mm_srl_pi32\n" |
| 24823 | "#define _m_psrldi _mm_srli_pi32\n" |
| 24824 | "#define _m_psrlq _mm_srl_si64\n" |
| 24825 | "#define _m_psrlqi _mm_srli_si64\n" |
| 24826 | "#define _m_pand _mm_and_si64\n" |
| 24827 | "#define _m_pandn _mm_andnot_si64\n" |
| 24828 | "#define _m_por _mm_or_si64\n" |
| 24829 | "#define _m_pxor _mm_xor_si64\n" |
| 24830 | "#define _m_pcmpeqb _mm_cmpeq_pi8\n" |
| 24831 | "#define _m_pcmpeqw _mm_cmpeq_pi16\n" |
| 24832 | "#define _m_pcmpeqd _mm_cmpeq_pi32\n" |
| 24833 | "#define _m_pcmpgtb _mm_cmpgt_pi8\n" |
| 24834 | "#define _m_pcmpgtw _mm_cmpgt_pi16\n" |
| 24835 | "#define _m_pcmpgtd _mm_cmpgt_pi32\n" |
| 24836 | "\n" |
| 24837 | "#endif /* __MMINTRIN_H */\n" |
| 24838 | "\n" |
| 24839 | "" } , |
| 24840 | { "/builtins/movdirintrin.h" , "/*===------------------------- movdirintrin.h ------------------------------===\n" |
| 24841 | " *\n" |
| 24842 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 24843 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 24844 | " * in the Software without restriction, including without limitation the rights\n" |
| 24845 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 24846 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 24847 | " * furnished to do so, subject to the following conditions:\n" |
| 24848 | " *\n" |
| 24849 | " * The above copyright notice and this permission notice shall be included in\n" |
| 24850 | " * all copies or substantial portions of the Software.\n" |
| 24851 | " *\n" |
| 24852 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 24853 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 24854 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 24855 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 24856 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 24857 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 24858 | " * THE SOFTWARE.\n" |
| 24859 | " *\n" |
| 24860 | " *===-----------------------------------------------------------------------===\n" |
| 24861 | " */\n" |
| 24862 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 24863 | "#error \"Never use <movdirintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 24864 | "#endif\n" |
| 24865 | "\n" |
| 24866 | "#ifndef _MOVDIRINTRIN_H\n" |
| 24867 | "#define _MOVDIRINTRIN_H\n" |
| 24868 | "\n" |
| 24869 | "/* Move doubleword as direct store */\n" |
| 24870 | "static __inline__ void\n" |
| 24871 | "__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n" |
| 24872 | "_directstoreu_u32 (void *__dst, unsigned int __value)\n" |
| 24873 | "{\n" |
| 24874 | " __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value);\n" |
| 24875 | "}\n" |
| 24876 | "\n" |
| 24877 | "#ifdef __x86_64__\n" |
| 24878 | "\n" |
| 24879 | "/* Move quadword as direct store */\n" |
| 24880 | "static __inline__ void\n" |
| 24881 | "__attribute__((__always_inline__, __nodebug__, __target__(\"movdiri\")))\n" |
| 24882 | "_directstoreu_u64 (void *__dst, unsigned long __value)\n" |
| 24883 | "{\n" |
| 24884 | " __builtin_ia32_directstore_u64((unsigned long *)__dst, __value);\n" |
| 24885 | "}\n" |
| 24886 | "\n" |
| 24887 | "#endif /* __x86_64__ */\n" |
| 24888 | "\n" |
| 24889 | "/*\n" |
| 24890 | " * movdir64b - Move 64 bytes as direct store.\n" |
| 24891 | " * The destination must be 64 byte aligned, and the store is atomic.\n" |
| 24892 | " * The source address has no alignment requirement, and the load from\n" |
| 24893 | " * the source address is not atomic.\n" |
| 24894 | " */\n" |
| 24895 | "static __inline__ void\n" |
| 24896 | "__attribute__((__always_inline__, __nodebug__, __target__(\"movdir64b\")))\n" |
| 24897 | "_movdir64b (void *__dst __attribute__((align_value(64))), const void *__src)\n" |
| 24898 | "{\n" |
| 24899 | " __builtin_ia32_movdir64b(__dst, __src);\n" |
| 24900 | "}\n" |
| 24901 | "\n" |
| 24902 | "#endif /* _MOVDIRINTRIN_H */\n" |
| 24903 | "" } , |
| 24904 | { "/builtins/msa.h" , "/*===---- msa.h - MIPS MSA intrinsics --------------------------------------===\n" |
| 24905 | " *\n" |
| 24906 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 24907 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 24908 | " * in the Software without restriction, including without limitation the rights\n" |
| 24909 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 24910 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 24911 | " * furnished to do so, subject to the following conditions:\n" |
| 24912 | " *\n" |
| 24913 | " * The above copyright notice and this permission notice shall be included in\n" |
| 24914 | " * all copies or substantial portions of the Software.\n" |
| 24915 | " *\n" |
| 24916 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 24917 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 24918 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 24919 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 24920 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 24921 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 24922 | " * THE SOFTWARE.\n" |
| 24923 | " *\n" |
| 24924 | " *===-----------------------------------------------------------------------===\n" |
| 24925 | " */\n" |
| 24926 | "\n" |
| 24927 | "#ifndef _MSA_H\n" |
| 24928 | "#define _MSA_H 1\n" |
| 24929 | "\n" |
| 24930 | "#if defined(__mips_msa)\n" |
| 24931 | "typedef signed char v16i8 __attribute__((vector_size(16), aligned(16)));\n" |
| 24932 | "typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1)));\n" |
| 24933 | "typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16)));\n" |
| 24934 | "typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1)));\n" |
| 24935 | "typedef short v8i16 __attribute__((vector_size(16), aligned(16)));\n" |
| 24936 | "typedef short v8i16_h __attribute__((vector_size(16), aligned(2)));\n" |
| 24937 | "typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16)));\n" |
| 24938 | "typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2)));\n" |
| 24939 | "typedef int v4i32 __attribute__((vector_size(16), aligned(16)));\n" |
| 24940 | "typedef int v4i32_w __attribute__((vector_size(16), aligned(4)));\n" |
| 24941 | "typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16)));\n" |
| 24942 | "typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4)));\n" |
| 24943 | "typedef long long v2i64 __attribute__((vector_size(16), aligned(16)));\n" |
| 24944 | "typedef long long v2i64_d __attribute__((vector_size(16), aligned(8)));\n" |
| 24945 | "typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16)));\n" |
| 24946 | "typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8)));\n" |
| 24947 | "typedef float v4f32 __attribute__((vector_size(16), aligned(16)));\n" |
| 24948 | "typedef float v4f32_w __attribute__((vector_size(16), aligned(4)));\n" |
| 24949 | "typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));\n" |
| 24950 | "typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));\n" |
| 24951 | "\n" |
| 24952 | "#define __msa_sll_b __builtin_msa_sll_b\n" |
| 24953 | "#define __msa_sll_h __builtin_msa_sll_h\n" |
| 24954 | "#define __msa_sll_w __builtin_msa_sll_w\n" |
| 24955 | "#define __msa_sll_d __builtin_msa_sll_d\n" |
| 24956 | "#define __msa_slli_b __builtin_msa_slli_b\n" |
| 24957 | "#define __msa_slli_h __builtin_msa_slli_h\n" |
| 24958 | "#define __msa_slli_w __builtin_msa_slli_w\n" |
| 24959 | "#define __msa_slli_d __builtin_msa_slli_d\n" |
| 24960 | "#define __msa_sra_b __builtin_msa_sra_b\n" |
| 24961 | "#define __msa_sra_h __builtin_msa_sra_h\n" |
| 24962 | "#define __msa_sra_w __builtin_msa_sra_w\n" |
| 24963 | "#define __msa_sra_d __builtin_msa_sra_d\n" |
| 24964 | "#define __msa_srai_b __builtin_msa_srai_b\n" |
| 24965 | "#define __msa_srai_h __builtin_msa_srai_h\n" |
| 24966 | "#define __msa_srai_w __builtin_msa_srai_w\n" |
| 24967 | "#define __msa_srai_d __builtin_msa_srai_d\n" |
| 24968 | "#define __msa_srar_b __builtin_msa_srar_b\n" |
| 24969 | "#define __msa_srar_h __builtin_msa_srar_h\n" |
| 24970 | "#define __msa_srar_w __builtin_msa_srar_w\n" |
| 24971 | "#define __msa_srar_d __builtin_msa_srar_d\n" |
| 24972 | "#define __msa_srari_b __builtin_msa_srari_b\n" |
| 24973 | "#define __msa_srari_h __builtin_msa_srari_h\n" |
| 24974 | "#define __msa_srari_w __builtin_msa_srari_w\n" |
| 24975 | "#define __msa_srari_d __builtin_msa_srari_d\n" |
| 24976 | "#define __msa_srl_b __builtin_msa_srl_b\n" |
| 24977 | "#define __msa_srl_h __builtin_msa_srl_h\n" |
| 24978 | "#define __msa_srl_w __builtin_msa_srl_w\n" |
| 24979 | "#define __msa_srl_d __builtin_msa_srl_d\n" |
| 24980 | "#define __msa_srli_b __builtin_msa_srli_b\n" |
| 24981 | "#define __msa_srli_h __builtin_msa_srli_h\n" |
| 24982 | "#define __msa_srli_w __builtin_msa_srli_w\n" |
| 24983 | "#define __msa_srli_d __builtin_msa_srli_d\n" |
| 24984 | "#define __msa_srlr_b __builtin_msa_srlr_b\n" |
| 24985 | "#define __msa_srlr_h __builtin_msa_srlr_h\n" |
| 24986 | "#define __msa_srlr_w __builtin_msa_srlr_w\n" |
| 24987 | "#define __msa_srlr_d __builtin_msa_srlr_d\n" |
| 24988 | "#define __msa_srlri_b __builtin_msa_srlri_b\n" |
| 24989 | "#define __msa_srlri_h __builtin_msa_srlri_h\n" |
| 24990 | "#define __msa_srlri_w __builtin_msa_srlri_w\n" |
| 24991 | "#define __msa_srlri_d __builtin_msa_srlri_d\n" |
| 24992 | "#define __msa_bclr_b __builtin_msa_bclr_b\n" |
| 24993 | "#define __msa_bclr_h __builtin_msa_bclr_h\n" |
| 24994 | "#define __msa_bclr_w __builtin_msa_bclr_w\n" |
| 24995 | "#define __msa_bclr_d __builtin_msa_bclr_d\n" |
| 24996 | "#define __msa_bclri_b __builtin_msa_bclri_b\n" |
| 24997 | "#define __msa_bclri_h __builtin_msa_bclri_h\n" |
| 24998 | "#define __msa_bclri_w __builtin_msa_bclri_w\n" |
| 24999 | "#define __msa_bclri_d __builtin_msa_bclri_d\n" |
| 25000 | "#define __msa_bset_b __builtin_msa_bset_b\n" |
| 25001 | "#define __msa_bset_h __builtin_msa_bset_h\n" |
| 25002 | "#define __msa_bset_w __builtin_msa_bset_w\n" |
| 25003 | "#define __msa_bset_d __builtin_msa_bset_d\n" |
| 25004 | "#define __msa_bseti_b __builtin_msa_bseti_b\n" |
| 25005 | "#define __msa_bseti_h __builtin_msa_bseti_h\n" |
| 25006 | "#define __msa_bseti_w __builtin_msa_bseti_w\n" |
| 25007 | "#define __msa_bseti_d __builtin_msa_bseti_d\n" |
| 25008 | "#define __msa_bneg_b __builtin_msa_bneg_b\n" |
| 25009 | "#define __msa_bneg_h __builtin_msa_bneg_h\n" |
| 25010 | "#define __msa_bneg_w __builtin_msa_bneg_w\n" |
| 25011 | "#define __msa_bneg_d __builtin_msa_bneg_d\n" |
| 25012 | "#define __msa_bnegi_b __builtin_msa_bnegi_b\n" |
| 25013 | "#define __msa_bnegi_h __builtin_msa_bnegi_h\n" |
| 25014 | "#define __msa_bnegi_w __builtin_msa_bnegi_w\n" |
| 25015 | "#define __msa_bnegi_d __builtin_msa_bnegi_d\n" |
| 25016 | "#define __msa_binsl_b __builtin_msa_binsl_b\n" |
| 25017 | "#define __msa_binsl_h __builtin_msa_binsl_h\n" |
| 25018 | "#define __msa_binsl_w __builtin_msa_binsl_w\n" |
| 25019 | "#define __msa_binsl_d __builtin_msa_binsl_d\n" |
| 25020 | "#define __msa_binsli_b __builtin_msa_binsli_b\n" |
| 25021 | "#define __msa_binsli_h __builtin_msa_binsli_h\n" |
| 25022 | "#define __msa_binsli_w __builtin_msa_binsli_w\n" |
| 25023 | "#define __msa_binsli_d __builtin_msa_binsli_d\n" |
| 25024 | "#define __msa_binsr_b __builtin_msa_binsr_b\n" |
| 25025 | "#define __msa_binsr_h __builtin_msa_binsr_h\n" |
| 25026 | "#define __msa_binsr_w __builtin_msa_binsr_w\n" |
| 25027 | "#define __msa_binsr_d __builtin_msa_binsr_d\n" |
| 25028 | "#define __msa_binsri_b __builtin_msa_binsri_b\n" |
| 25029 | "#define __msa_binsri_h __builtin_msa_binsri_h\n" |
| 25030 | "#define __msa_binsri_w __builtin_msa_binsri_w\n" |
| 25031 | "#define __msa_binsri_d __builtin_msa_binsri_d\n" |
| 25032 | "#define __msa_addv_b __builtin_msa_addv_b\n" |
| 25033 | "#define __msa_addv_h __builtin_msa_addv_h\n" |
| 25034 | "#define __msa_addv_w __builtin_msa_addv_w\n" |
| 25035 | "#define __msa_addv_d __builtin_msa_addv_d\n" |
| 25036 | "#define __msa_addvi_b __builtin_msa_addvi_b\n" |
| 25037 | "#define __msa_addvi_h __builtin_msa_addvi_h\n" |
| 25038 | "#define __msa_addvi_w __builtin_msa_addvi_w\n" |
| 25039 | "#define __msa_addvi_d __builtin_msa_addvi_d\n" |
| 25040 | "#define __msa_subv_b __builtin_msa_subv_b\n" |
| 25041 | "#define __msa_subv_h __builtin_msa_subv_h\n" |
| 25042 | "#define __msa_subv_w __builtin_msa_subv_w\n" |
| 25043 | "#define __msa_subv_d __builtin_msa_subv_d\n" |
| 25044 | "#define __msa_subvi_b __builtin_msa_subvi_b\n" |
| 25045 | "#define __msa_subvi_h __builtin_msa_subvi_h\n" |
| 25046 | "#define __msa_subvi_w __builtin_msa_subvi_w\n" |
| 25047 | "#define __msa_subvi_d __builtin_msa_subvi_d\n" |
| 25048 | "#define __msa_max_s_b __builtin_msa_max_s_b\n" |
| 25049 | "#define __msa_max_s_h __builtin_msa_max_s_h\n" |
| 25050 | "#define __msa_max_s_w __builtin_msa_max_s_w\n" |
| 25051 | "#define __msa_max_s_d __builtin_msa_max_s_d\n" |
| 25052 | "#define __msa_maxi_s_b __builtin_msa_maxi_s_b\n" |
| 25053 | "#define __msa_maxi_s_h __builtin_msa_maxi_s_h\n" |
| 25054 | "#define __msa_maxi_s_w __builtin_msa_maxi_s_w\n" |
| 25055 | "#define __msa_maxi_s_d __builtin_msa_maxi_s_d\n" |
| 25056 | "#define __msa_max_u_b __builtin_msa_max_u_b\n" |
| 25057 | "#define __msa_max_u_h __builtin_msa_max_u_h\n" |
| 25058 | "#define __msa_max_u_w __builtin_msa_max_u_w\n" |
| 25059 | "#define __msa_max_u_d __builtin_msa_max_u_d\n" |
| 25060 | "#define __msa_maxi_u_b __builtin_msa_maxi_u_b\n" |
| 25061 | "#define __msa_maxi_u_h __builtin_msa_maxi_u_h\n" |
| 25062 | "#define __msa_maxi_u_w __builtin_msa_maxi_u_w\n" |
| 25063 | "#define __msa_maxi_u_d __builtin_msa_maxi_u_d\n" |
| 25064 | "#define __msa_min_s_b __builtin_msa_min_s_b\n" |
| 25065 | "#define __msa_min_s_h __builtin_msa_min_s_h\n" |
| 25066 | "#define __msa_min_s_w __builtin_msa_min_s_w\n" |
| 25067 | "#define __msa_min_s_d __builtin_msa_min_s_d\n" |
| 25068 | "#define __msa_mini_s_b __builtin_msa_mini_s_b\n" |
| 25069 | "#define __msa_mini_s_h __builtin_msa_mini_s_h\n" |
| 25070 | "#define __msa_mini_s_w __builtin_msa_mini_s_w\n" |
| 25071 | "#define __msa_mini_s_d __builtin_msa_mini_s_d\n" |
| 25072 | "#define __msa_min_u_b __builtin_msa_min_u_b\n" |
| 25073 | "#define __msa_min_u_h __builtin_msa_min_u_h\n" |
| 25074 | "#define __msa_min_u_w __builtin_msa_min_u_w\n" |
| 25075 | "#define __msa_min_u_d __builtin_msa_min_u_d\n" |
| 25076 | "#define __msa_mini_u_b __builtin_msa_mini_u_b\n" |
| 25077 | "#define __msa_mini_u_h __builtin_msa_mini_u_h\n" |
| 25078 | "#define __msa_mini_u_w __builtin_msa_mini_u_w\n" |
| 25079 | "#define __msa_mini_u_d __builtin_msa_mini_u_d\n" |
| 25080 | "#define __msa_max_a_b __builtin_msa_max_a_b\n" |
| 25081 | "#define __msa_max_a_h __builtin_msa_max_a_h\n" |
| 25082 | "#define __msa_max_a_w __builtin_msa_max_a_w\n" |
| 25083 | "#define __msa_max_a_d __builtin_msa_max_a_d\n" |
| 25084 | "#define __msa_min_a_b __builtin_msa_min_a_b\n" |
| 25085 | "#define __msa_min_a_h __builtin_msa_min_a_h\n" |
| 25086 | "#define __msa_min_a_w __builtin_msa_min_a_w\n" |
| 25087 | "#define __msa_min_a_d __builtin_msa_min_a_d\n" |
| 25088 | "#define __msa_ceq_b __builtin_msa_ceq_b\n" |
| 25089 | "#define __msa_ceq_h __builtin_msa_ceq_h\n" |
| 25090 | "#define __msa_ceq_w __builtin_msa_ceq_w\n" |
| 25091 | "#define __msa_ceq_d __builtin_msa_ceq_d\n" |
| 25092 | "#define __msa_ceqi_b __builtin_msa_ceqi_b\n" |
| 25093 | "#define __msa_ceqi_h __builtin_msa_ceqi_h\n" |
| 25094 | "#define __msa_ceqi_w __builtin_msa_ceqi_w\n" |
| 25095 | "#define __msa_ceqi_d __builtin_msa_ceqi_d\n" |
| 25096 | "#define __msa_clt_s_b __builtin_msa_clt_s_b\n" |
| 25097 | "#define __msa_clt_s_h __builtin_msa_clt_s_h\n" |
| 25098 | "#define __msa_clt_s_w __builtin_msa_clt_s_w\n" |
| 25099 | "#define __msa_clt_s_d __builtin_msa_clt_s_d\n" |
| 25100 | "#define __msa_clti_s_b __builtin_msa_clti_s_b\n" |
| 25101 | "#define __msa_clti_s_h __builtin_msa_clti_s_h\n" |
| 25102 | "#define __msa_clti_s_w __builtin_msa_clti_s_w\n" |
| 25103 | "#define __msa_clti_s_d __builtin_msa_clti_s_d\n" |
| 25104 | "#define __msa_clt_u_b __builtin_msa_clt_u_b\n" |
| 25105 | "#define __msa_clt_u_h __builtin_msa_clt_u_h\n" |
| 25106 | "#define __msa_clt_u_w __builtin_msa_clt_u_w\n" |
| 25107 | "#define __msa_clt_u_d __builtin_msa_clt_u_d\n" |
| 25108 | "#define __msa_clti_u_b __builtin_msa_clti_u_b\n" |
| 25109 | "#define __msa_clti_u_h __builtin_msa_clti_u_h\n" |
| 25110 | "#define __msa_clti_u_w __builtin_msa_clti_u_w\n" |
| 25111 | "#define __msa_clti_u_d __builtin_msa_clti_u_d\n" |
| 25112 | "#define __msa_cle_s_b __builtin_msa_cle_s_b\n" |
| 25113 | "#define __msa_cle_s_h __builtin_msa_cle_s_h\n" |
| 25114 | "#define __msa_cle_s_w __builtin_msa_cle_s_w\n" |
| 25115 | "#define __msa_cle_s_d __builtin_msa_cle_s_d\n" |
| 25116 | "#define __msa_clei_s_b __builtin_msa_clei_s_b\n" |
| 25117 | "#define __msa_clei_s_h __builtin_msa_clei_s_h\n" |
| 25118 | "#define __msa_clei_s_w __builtin_msa_clei_s_w\n" |
| 25119 | "#define __msa_clei_s_d __builtin_msa_clei_s_d\n" |
| 25120 | "#define __msa_cle_u_b __builtin_msa_cle_u_b\n" |
| 25121 | "#define __msa_cle_u_h __builtin_msa_cle_u_h\n" |
| 25122 | "#define __msa_cle_u_w __builtin_msa_cle_u_w\n" |
| 25123 | "#define __msa_cle_u_d __builtin_msa_cle_u_d\n" |
| 25124 | "#define __msa_clei_u_b __builtin_msa_clei_u_b\n" |
| 25125 | "#define __msa_clei_u_h __builtin_msa_clei_u_h\n" |
| 25126 | "#define __msa_clei_u_w __builtin_msa_clei_u_w\n" |
| 25127 | "#define __msa_clei_u_d __builtin_msa_clei_u_d\n" |
| 25128 | "#define __msa_ld_b __builtin_msa_ld_b\n" |
| 25129 | "#define __msa_ld_h __builtin_msa_ld_h\n" |
| 25130 | "#define __msa_ld_w __builtin_msa_ld_w\n" |
| 25131 | "#define __msa_ld_d __builtin_msa_ld_d\n" |
| 25132 | "#define __msa_st_b __builtin_msa_st_b\n" |
| 25133 | "#define __msa_st_h __builtin_msa_st_h\n" |
| 25134 | "#define __msa_st_w __builtin_msa_st_w\n" |
| 25135 | "#define __msa_st_d __builtin_msa_st_d\n" |
| 25136 | "#define __msa_sat_s_b __builtin_msa_sat_s_b\n" |
| 25137 | "#define __msa_sat_s_h __builtin_msa_sat_s_h\n" |
| 25138 | "#define __msa_sat_s_w __builtin_msa_sat_s_w\n" |
| 25139 | "#define __msa_sat_s_d __builtin_msa_sat_s_d\n" |
| 25140 | "#define __msa_sat_u_b __builtin_msa_sat_u_b\n" |
| 25141 | "#define __msa_sat_u_h __builtin_msa_sat_u_h\n" |
| 25142 | "#define __msa_sat_u_w __builtin_msa_sat_u_w\n" |
| 25143 | "#define __msa_sat_u_d __builtin_msa_sat_u_d\n" |
| 25144 | "#define __msa_add_a_b __builtin_msa_add_a_b\n" |
| 25145 | "#define __msa_add_a_h __builtin_msa_add_a_h\n" |
| 25146 | "#define __msa_add_a_w __builtin_msa_add_a_w\n" |
| 25147 | "#define __msa_add_a_d __builtin_msa_add_a_d\n" |
| 25148 | "#define __msa_adds_a_b __builtin_msa_adds_a_b\n" |
| 25149 | "#define __msa_adds_a_h __builtin_msa_adds_a_h\n" |
| 25150 | "#define __msa_adds_a_w __builtin_msa_adds_a_w\n" |
| 25151 | "#define __msa_adds_a_d __builtin_msa_adds_a_d\n" |
| 25152 | "#define __msa_adds_s_b __builtin_msa_adds_s_b\n" |
| 25153 | "#define __msa_adds_s_h __builtin_msa_adds_s_h\n" |
| 25154 | "#define __msa_adds_s_w __builtin_msa_adds_s_w\n" |
| 25155 | "#define __msa_adds_s_d __builtin_msa_adds_s_d\n" |
| 25156 | "#define __msa_adds_u_b __builtin_msa_adds_u_b\n" |
| 25157 | "#define __msa_adds_u_h __builtin_msa_adds_u_h\n" |
| 25158 | "#define __msa_adds_u_w __builtin_msa_adds_u_w\n" |
| 25159 | "#define __msa_adds_u_d __builtin_msa_adds_u_d\n" |
| 25160 | "#define __msa_ave_s_b __builtin_msa_ave_s_b\n" |
| 25161 | "#define __msa_ave_s_h __builtin_msa_ave_s_h\n" |
| 25162 | "#define __msa_ave_s_w __builtin_msa_ave_s_w\n" |
| 25163 | "#define __msa_ave_s_d __builtin_msa_ave_s_d\n" |
| 25164 | "#define __msa_ave_u_b __builtin_msa_ave_u_b\n" |
| 25165 | "#define __msa_ave_u_h __builtin_msa_ave_u_h\n" |
| 25166 | "#define __msa_ave_u_w __builtin_msa_ave_u_w\n" |
| 25167 | "#define __msa_ave_u_d __builtin_msa_ave_u_d\n" |
| 25168 | "#define __msa_aver_s_b __builtin_msa_aver_s_b\n" |
| 25169 | "#define __msa_aver_s_h __builtin_msa_aver_s_h\n" |
| 25170 | "#define __msa_aver_s_w __builtin_msa_aver_s_w\n" |
| 25171 | "#define __msa_aver_s_d __builtin_msa_aver_s_d\n" |
| 25172 | "#define __msa_aver_u_b __builtin_msa_aver_u_b\n" |
| 25173 | "#define __msa_aver_u_h __builtin_msa_aver_u_h\n" |
| 25174 | "#define __msa_aver_u_w __builtin_msa_aver_u_w\n" |
| 25175 | "#define __msa_aver_u_d __builtin_msa_aver_u_d\n" |
| 25176 | "#define __msa_subs_s_b __builtin_msa_subs_s_b\n" |
| 25177 | "#define __msa_subs_s_h __builtin_msa_subs_s_h\n" |
| 25178 | "#define __msa_subs_s_w __builtin_msa_subs_s_w\n" |
| 25179 | "#define __msa_subs_s_d __builtin_msa_subs_s_d\n" |
| 25180 | "#define __msa_subs_u_b __builtin_msa_subs_u_b\n" |
| 25181 | "#define __msa_subs_u_h __builtin_msa_subs_u_h\n" |
| 25182 | "#define __msa_subs_u_w __builtin_msa_subs_u_w\n" |
| 25183 | "#define __msa_subs_u_d __builtin_msa_subs_u_d\n" |
| 25184 | "#define __msa_subsuu_s_b __builtin_msa_subsuu_s_b\n" |
| 25185 | "#define __msa_subsuu_s_h __builtin_msa_subsuu_s_h\n" |
| 25186 | "#define __msa_subsuu_s_w __builtin_msa_subsuu_s_w\n" |
| 25187 | "#define __msa_subsuu_s_d __builtin_msa_subsuu_s_d\n" |
| 25188 | "#define __msa_subsus_u_b __builtin_msa_subsus_u_b\n" |
| 25189 | "#define __msa_subsus_u_h __builtin_msa_subsus_u_h\n" |
| 25190 | "#define __msa_subsus_u_w __builtin_msa_subsus_u_w\n" |
| 25191 | "#define __msa_subsus_u_d __builtin_msa_subsus_u_d\n" |
| 25192 | "#define __msa_asub_s_b __builtin_msa_asub_s_b\n" |
| 25193 | "#define __msa_asub_s_h __builtin_msa_asub_s_h\n" |
| 25194 | "#define __msa_asub_s_w __builtin_msa_asub_s_w\n" |
| 25195 | "#define __msa_asub_s_d __builtin_msa_asub_s_d\n" |
| 25196 | "#define __msa_asub_u_b __builtin_msa_asub_u_b\n" |
| 25197 | "#define __msa_asub_u_h __builtin_msa_asub_u_h\n" |
| 25198 | "#define __msa_asub_u_w __builtin_msa_asub_u_w\n" |
| 25199 | "#define __msa_asub_u_d __builtin_msa_asub_u_d\n" |
| 25200 | "#define __msa_mulv_b __builtin_msa_mulv_b\n" |
| 25201 | "#define __msa_mulv_h __builtin_msa_mulv_h\n" |
| 25202 | "#define __msa_mulv_w __builtin_msa_mulv_w\n" |
| 25203 | "#define __msa_mulv_d __builtin_msa_mulv_d\n" |
| 25204 | "#define __msa_maddv_b __builtin_msa_maddv_b\n" |
| 25205 | "#define __msa_maddv_h __builtin_msa_maddv_h\n" |
| 25206 | "#define __msa_maddv_w __builtin_msa_maddv_w\n" |
| 25207 | "#define __msa_maddv_d __builtin_msa_maddv_d\n" |
| 25208 | "#define __msa_msubv_b __builtin_msa_msubv_b\n" |
| 25209 | "#define __msa_msubv_h __builtin_msa_msubv_h\n" |
| 25210 | "#define __msa_msubv_w __builtin_msa_msubv_w\n" |
| 25211 | "#define __msa_msubv_d __builtin_msa_msubv_d\n" |
| 25212 | "#define __msa_div_s_b __builtin_msa_div_s_b\n" |
| 25213 | "#define __msa_div_s_h __builtin_msa_div_s_h\n" |
| 25214 | "#define __msa_div_s_w __builtin_msa_div_s_w\n" |
| 25215 | "#define __msa_div_s_d __builtin_msa_div_s_d\n" |
| 25216 | "#define __msa_div_u_b __builtin_msa_div_u_b\n" |
| 25217 | "#define __msa_div_u_h __builtin_msa_div_u_h\n" |
| 25218 | "#define __msa_div_u_w __builtin_msa_div_u_w\n" |
| 25219 | "#define __msa_div_u_d __builtin_msa_div_u_d\n" |
| 25220 | "#define __msa_hadd_s_h __builtin_msa_hadd_s_h\n" |
| 25221 | "#define __msa_hadd_s_w __builtin_msa_hadd_s_w\n" |
| 25222 | "#define __msa_hadd_s_d __builtin_msa_hadd_s_d\n" |
| 25223 | "#define __msa_hadd_u_h __builtin_msa_hadd_u_h\n" |
| 25224 | "#define __msa_hadd_u_w __builtin_msa_hadd_u_w\n" |
| 25225 | "#define __msa_hadd_u_d __builtin_msa_hadd_u_d\n" |
| 25226 | "#define __msa_hsub_s_h __builtin_msa_hsub_s_h\n" |
| 25227 | "#define __msa_hsub_s_w __builtin_msa_hsub_s_w\n" |
| 25228 | "#define __msa_hsub_s_d __builtin_msa_hsub_s_d\n" |
| 25229 | "#define __msa_hsub_u_h __builtin_msa_hsub_u_h\n" |
| 25230 | "#define __msa_hsub_u_w __builtin_msa_hsub_u_w\n" |
| 25231 | "#define __msa_hsub_u_d __builtin_msa_hsub_u_d\n" |
| 25232 | "#define __msa_mod_s_b __builtin_msa_mod_s_b\n" |
| 25233 | "#define __msa_mod_s_h __builtin_msa_mod_s_h\n" |
| 25234 | "#define __msa_mod_s_w __builtin_msa_mod_s_w\n" |
| 25235 | "#define __msa_mod_s_d __builtin_msa_mod_s_d\n" |
| 25236 | "#define __msa_mod_u_b __builtin_msa_mod_u_b\n" |
| 25237 | "#define __msa_mod_u_h __builtin_msa_mod_u_h\n" |
| 25238 | "#define __msa_mod_u_w __builtin_msa_mod_u_w\n" |
| 25239 | "#define __msa_mod_u_d __builtin_msa_mod_u_d\n" |
| 25240 | "#define __msa_dotp_s_h __builtin_msa_dotp_s_h\n" |
| 25241 | "#define __msa_dotp_s_w __builtin_msa_dotp_s_w\n" |
| 25242 | "#define __msa_dotp_s_d __builtin_msa_dotp_s_d\n" |
| 25243 | "#define __msa_dotp_u_h __builtin_msa_dotp_u_h\n" |
| 25244 | "#define __msa_dotp_u_w __builtin_msa_dotp_u_w\n" |
| 25245 | "#define __msa_dotp_u_d __builtin_msa_dotp_u_d\n" |
| 25246 | "#define __msa_dpadd_s_h __builtin_msa_dpadd_s_h\n" |
| 25247 | "#define __msa_dpadd_s_w __builtin_msa_dpadd_s_w\n" |
| 25248 | "#define __msa_dpadd_s_d __builtin_msa_dpadd_s_d\n" |
| 25249 | "#define __msa_dpadd_u_h __builtin_msa_dpadd_u_h\n" |
| 25250 | "#define __msa_dpadd_u_w __builtin_msa_dpadd_u_w\n" |
| 25251 | "#define __msa_dpadd_u_d __builtin_msa_dpadd_u_d\n" |
| 25252 | "#define __msa_dpsub_s_h __builtin_msa_dpsub_s_h\n" |
| 25253 | "#define __msa_dpsub_s_w __builtin_msa_dpsub_s_w\n" |
| 25254 | "#define __msa_dpsub_s_d __builtin_msa_dpsub_s_d\n" |
| 25255 | "#define __msa_dpsub_u_h __builtin_msa_dpsub_u_h\n" |
| 25256 | "#define __msa_dpsub_u_w __builtin_msa_dpsub_u_w\n" |
| 25257 | "#define __msa_dpsub_u_d __builtin_msa_dpsub_u_d\n" |
| 25258 | "#define __msa_sld_b __builtin_msa_sld_b\n" |
| 25259 | "#define __msa_sld_h __builtin_msa_sld_h\n" |
| 25260 | "#define __msa_sld_w __builtin_msa_sld_w\n" |
| 25261 | "#define __msa_sld_d __builtin_msa_sld_d\n" |
| 25262 | "#define __msa_sldi_b __builtin_msa_sldi_b\n" |
| 25263 | "#define __msa_sldi_h __builtin_msa_sldi_h\n" |
| 25264 | "#define __msa_sldi_w __builtin_msa_sldi_w\n" |
| 25265 | "#define __msa_sldi_d __builtin_msa_sldi_d\n" |
| 25266 | "#define __msa_splat_b __builtin_msa_splat_b\n" |
| 25267 | "#define __msa_splat_h __builtin_msa_splat_h\n" |
| 25268 | "#define __msa_splat_w __builtin_msa_splat_w\n" |
| 25269 | "#define __msa_splat_d __builtin_msa_splat_d\n" |
| 25270 | "#define __msa_splati_b __builtin_msa_splati_b\n" |
| 25271 | "#define __msa_splati_h __builtin_msa_splati_h\n" |
| 25272 | "#define __msa_splati_w __builtin_msa_splati_w\n" |
| 25273 | "#define __msa_splati_d __builtin_msa_splati_d\n" |
| 25274 | "#define __msa_pckev_b __builtin_msa_pckev_b\n" |
| 25275 | "#define __msa_pckev_h __builtin_msa_pckev_h\n" |
| 25276 | "#define __msa_pckev_w __builtin_msa_pckev_w\n" |
| 25277 | "#define __msa_pckev_d __builtin_msa_pckev_d\n" |
| 25278 | "#define __msa_pckod_b __builtin_msa_pckod_b\n" |
| 25279 | "#define __msa_pckod_h __builtin_msa_pckod_h\n" |
| 25280 | "#define __msa_pckod_w __builtin_msa_pckod_w\n" |
| 25281 | "#define __msa_pckod_d __builtin_msa_pckod_d\n" |
| 25282 | "#define __msa_ilvl_b __builtin_msa_ilvl_b\n" |
| 25283 | "#define __msa_ilvl_h __builtin_msa_ilvl_h\n" |
| 25284 | "#define __msa_ilvl_w __builtin_msa_ilvl_w\n" |
| 25285 | "#define __msa_ilvl_d __builtin_msa_ilvl_d\n" |
| 25286 | "#define __msa_ilvr_b __builtin_msa_ilvr_b\n" |
| 25287 | "#define __msa_ilvr_h __builtin_msa_ilvr_h\n" |
| 25288 | "#define __msa_ilvr_w __builtin_msa_ilvr_w\n" |
| 25289 | "#define __msa_ilvr_d __builtin_msa_ilvr_d\n" |
| 25290 | "#define __msa_ilvev_b __builtin_msa_ilvev_b\n" |
| 25291 | "#define __msa_ilvev_h __builtin_msa_ilvev_h\n" |
| 25292 | "#define __msa_ilvev_w __builtin_msa_ilvev_w\n" |
| 25293 | "#define __msa_ilvev_d __builtin_msa_ilvev_d\n" |
| 25294 | "#define __msa_ilvod_b __builtin_msa_ilvod_b\n" |
| 25295 | "#define __msa_ilvod_h __builtin_msa_ilvod_h\n" |
| 25296 | "#define __msa_ilvod_w __builtin_msa_ilvod_w\n" |
| 25297 | "#define __msa_ilvod_d __builtin_msa_ilvod_d\n" |
| 25298 | "#define __msa_vshf_b __builtin_msa_vshf_b\n" |
| 25299 | "#define __msa_vshf_h __builtin_msa_vshf_h\n" |
| 25300 | "#define __msa_vshf_w __builtin_msa_vshf_w\n" |
| 25301 | "#define __msa_vshf_d __builtin_msa_vshf_d\n" |
| 25302 | "#define __msa_and_v __builtin_msa_and_v\n" |
| 25303 | "#define __msa_andi_b __builtin_msa_andi_b\n" |
| 25304 | "#define __msa_or_v __builtin_msa_or_v\n" |
| 25305 | "#define __msa_ori_b __builtin_msa_ori_b\n" |
| 25306 | "#define __msa_nor_v __builtin_msa_nor_v\n" |
| 25307 | "#define __msa_nori_b __builtin_msa_nori_b\n" |
| 25308 | "#define __msa_xor_v __builtin_msa_xor_v\n" |
| 25309 | "#define __msa_xori_b __builtin_msa_xori_b\n" |
| 25310 | "#define __msa_bmnz_v __builtin_msa_bmnz_v\n" |
| 25311 | "#define __msa_bmnzi_b __builtin_msa_bmnzi_b\n" |
| 25312 | "#define __msa_bmz_v __builtin_msa_bmz_v\n" |
| 25313 | "#define __msa_bmzi_b __builtin_msa_bmzi_b\n" |
| 25314 | "#define __msa_bsel_v __builtin_msa_bsel_v\n" |
| 25315 | "#define __msa_bseli_b __builtin_msa_bseli_b\n" |
| 25316 | "#define __msa_shf_b __builtin_msa_shf_b\n" |
| 25317 | "#define __msa_shf_h __builtin_msa_shf_h\n" |
| 25318 | "#define __msa_shf_w __builtin_msa_shf_w\n" |
| 25319 | "#define __msa_test_bnz_v __builtin_msa_bnz_v\n" |
| 25320 | "#define __msa_test_bz_v __builtin_msa_bz_v\n" |
| 25321 | "#define __msa_fill_b __builtin_msa_fill_b\n" |
| 25322 | "#define __msa_fill_h __builtin_msa_fill_h\n" |
| 25323 | "#define __msa_fill_w __builtin_msa_fill_w\n" |
| 25324 | "#define __msa_fill_d __builtin_msa_fill_d\n" |
| 25325 | "#define __msa_pcnt_b __builtin_msa_pcnt_b\n" |
| 25326 | "#define __msa_pcnt_h __builtin_msa_pcnt_h\n" |
| 25327 | "#define __msa_pcnt_w __builtin_msa_pcnt_w\n" |
| 25328 | "#define __msa_pcnt_d __builtin_msa_pcnt_d\n" |
| 25329 | "#define __msa_nloc_b __builtin_msa_nloc_b\n" |
| 25330 | "#define __msa_nloc_h __builtin_msa_nloc_h\n" |
| 25331 | "#define __msa_nloc_w __builtin_msa_nloc_w\n" |
| 25332 | "#define __msa_nloc_d __builtin_msa_nloc_d\n" |
| 25333 | "#define __msa_nlzc_b __builtin_msa_nlzc_b\n" |
| 25334 | "#define __msa_nlzc_h __builtin_msa_nlzc_h\n" |
| 25335 | "#define __msa_nlzc_w __builtin_msa_nlzc_w\n" |
| 25336 | "#define __msa_nlzc_d __builtin_msa_nlzc_d\n" |
| 25337 | "#define __msa_copy_s_b __builtin_msa_copy_s_b\n" |
| 25338 | "#define __msa_copy_s_h __builtin_msa_copy_s_h\n" |
| 25339 | "#define __msa_copy_s_w __builtin_msa_copy_s_w\n" |
| 25340 | "#define __msa_copy_s_d __builtin_msa_copy_s_d\n" |
| 25341 | "#define __msa_copy_u_b __builtin_msa_copy_u_b\n" |
| 25342 | "#define __msa_copy_u_h __builtin_msa_copy_u_h\n" |
| 25343 | "#define __msa_copy_u_w __builtin_msa_copy_u_w\n" |
| 25344 | "#define __msa_copy_u_d __builtin_msa_copy_u_d\n" |
| 25345 | "#define __msa_insert_b __builtin_msa_insert_b\n" |
| 25346 | "#define __msa_insert_h __builtin_msa_insert_h\n" |
| 25347 | "#define __msa_insert_w __builtin_msa_insert_w\n" |
| 25348 | "#define __msa_insert_d __builtin_msa_insert_d\n" |
| 25349 | "#define __msa_insve_b __builtin_msa_insve_b\n" |
| 25350 | "#define __msa_insve_h __builtin_msa_insve_h\n" |
| 25351 | "#define __msa_insve_w __builtin_msa_insve_w\n" |
| 25352 | "#define __msa_insve_d __builtin_msa_insve_d\n" |
| 25353 | "#define __msa_test_bnz_b __builtin_msa_bnz_b\n" |
| 25354 | "#define __msa_test_bnz_h __builtin_msa_bnz_h\n" |
| 25355 | "#define __msa_test_bnz_w __builtin_msa_bnz_w\n" |
| 25356 | "#define __msa_test_bnz_d __builtin_msa_bnz_d\n" |
| 25357 | "#define __msa_test_bz_b __builtin_msa_bz_b\n" |
| 25358 | "#define __msa_test_bz_h __builtin_msa_bz_h\n" |
| 25359 | "#define __msa_test_bz_w __builtin_msa_bz_w\n" |
| 25360 | "#define __msa_test_bz_d __builtin_msa_bz_d\n" |
| 25361 | "#define __msa_ldi_b __builtin_msa_ldi_b\n" |
| 25362 | "#define __msa_ldi_h __builtin_msa_ldi_h\n" |
| 25363 | "#define __msa_ldi_w __builtin_msa_ldi_w\n" |
| 25364 | "#define __msa_ldi_d __builtin_msa_ldi_d\n" |
| 25365 | "#define __msa_fcaf_w __builtin_msa_fcaf_w\n" |
| 25366 | "#define __msa_fcaf_d __builtin_msa_fcaf_d\n" |
| 25367 | "#define __msa_fcor_w __builtin_msa_fcor_w\n" |
| 25368 | "#define __msa_fcor_d __builtin_msa_fcor_d\n" |
| 25369 | "#define __msa_fcun_w __builtin_msa_fcun_w\n" |
| 25370 | "#define __msa_fcun_d __builtin_msa_fcun_d\n" |
| 25371 | "#define __msa_fcune_w __builtin_msa_fcune_w\n" |
| 25372 | "#define __msa_fcune_d __builtin_msa_fcune_d\n" |
| 25373 | "#define __msa_fcueq_w __builtin_msa_fcueq_w\n" |
| 25374 | "#define __msa_fcueq_d __builtin_msa_fcueq_d\n" |
| 25375 | "#define __msa_fceq_w __builtin_msa_fceq_w\n" |
| 25376 | "#define __msa_fceq_d __builtin_msa_fceq_d\n" |
| 25377 | "#define __msa_fcne_w __builtin_msa_fcne_w\n" |
| 25378 | "#define __msa_fcne_d __builtin_msa_fcne_d\n" |
| 25379 | "#define __msa_fclt_w __builtin_msa_fclt_w\n" |
| 25380 | "#define __msa_fclt_d __builtin_msa_fclt_d\n" |
| 25381 | "#define __msa_fcult_w __builtin_msa_fcult_w\n" |
| 25382 | "#define __msa_fcult_d __builtin_msa_fcult_d\n" |
| 25383 | "#define __msa_fcle_w __builtin_msa_fcle_w\n" |
| 25384 | "#define __msa_fcle_d __builtin_msa_fcle_d\n" |
| 25385 | "#define __msa_fcule_w __builtin_msa_fcule_w\n" |
| 25386 | "#define __msa_fcule_d __builtin_msa_fcule_d\n" |
| 25387 | "#define __msa_fsaf_w __builtin_msa_fsaf_w\n" |
| 25388 | "#define __msa_fsaf_d __builtin_msa_fsaf_d\n" |
| 25389 | "#define __msa_fsor_w __builtin_msa_fsor_w\n" |
| 25390 | "#define __msa_fsor_d __builtin_msa_fsor_d\n" |
| 25391 | "#define __msa_fsun_w __builtin_msa_fsun_w\n" |
| 25392 | "#define __msa_fsun_d __builtin_msa_fsun_d\n" |
| 25393 | "#define __msa_fsune_w __builtin_msa_fsune_w\n" |
| 25394 | "#define __msa_fsune_d __builtin_msa_fsune_d\n" |
| 25395 | "#define __msa_fsueq_w __builtin_msa_fsueq_w\n" |
| 25396 | "#define __msa_fsueq_d __builtin_msa_fsueq_d\n" |
| 25397 | "#define __msa_fseq_w __builtin_msa_fseq_w\n" |
| 25398 | "#define __msa_fseq_d __builtin_msa_fseq_d\n" |
| 25399 | "#define __msa_fsne_w __builtin_msa_fsne_w\n" |
| 25400 | "#define __msa_fsne_d __builtin_msa_fsne_d\n" |
| 25401 | "#define __msa_fslt_w __builtin_msa_fslt_w\n" |
| 25402 | "#define __msa_fslt_d __builtin_msa_fslt_d\n" |
| 25403 | "#define __msa_fsult_w __builtin_msa_fsult_w\n" |
| 25404 | "#define __msa_fsult_d __builtin_msa_fsult_d\n" |
| 25405 | "#define __msa_fsle_w __builtin_msa_fsle_w\n" |
| 25406 | "#define __msa_fsle_d __builtin_msa_fsle_d\n" |
| 25407 | "#define __msa_fsule_w __builtin_msa_fsule_w\n" |
| 25408 | "#define __msa_fsule_d __builtin_msa_fsule_d\n" |
| 25409 | "#define __msa_fadd_w __builtin_msa_fadd_w\n" |
| 25410 | "#define __msa_fadd_d __builtin_msa_fadd_d\n" |
| 25411 | "#define __msa_fsub_w __builtin_msa_fsub_w\n" |
| 25412 | "#define __msa_fsub_d __builtin_msa_fsub_d\n" |
| 25413 | "#define __msa_fmul_w __builtin_msa_fmul_w\n" |
| 25414 | "#define __msa_fmul_d __builtin_msa_fmul_d\n" |
| 25415 | "#define __msa_fdiv_w __builtin_msa_fdiv_w\n" |
| 25416 | "#define __msa_fdiv_d __builtin_msa_fdiv_d\n" |
| 25417 | "#define __msa_fmadd_w __builtin_msa_fmadd_w\n" |
| 25418 | "#define __msa_fmadd_d __builtin_msa_fmadd_d\n" |
| 25419 | "#define __msa_fmsub_w __builtin_msa_fmsub_w\n" |
| 25420 | "#define __msa_fmsub_d __builtin_msa_fmsub_d\n" |
| 25421 | "#define __msa_fexp2_w __builtin_msa_fexp2_w\n" |
| 25422 | "#define __msa_fexp2_d __builtin_msa_fexp2_d\n" |
| 25423 | "#define __msa_fexdo_h __builtin_msa_fexdo_h\n" |
| 25424 | "#define __msa_fexdo_w __builtin_msa_fexdo_w\n" |
| 25425 | "#define __msa_ftq_h __builtin_msa_ftq_h\n" |
| 25426 | "#define __msa_ftq_w __builtin_msa_ftq_w\n" |
| 25427 | "#define __msa_fmin_w __builtin_msa_fmin_w\n" |
| 25428 | "#define __msa_fmin_d __builtin_msa_fmin_d\n" |
| 25429 | "#define __msa_fmin_a_w __builtin_msa_fmin_a_w\n" |
| 25430 | "#define __msa_fmin_a_d __builtin_msa_fmin_a_d\n" |
| 25431 | "#define __msa_fmax_w __builtin_msa_fmax_w\n" |
| 25432 | "#define __msa_fmax_d __builtin_msa_fmax_d\n" |
| 25433 | "#define __msa_fmax_a_w __builtin_msa_fmax_a_w\n" |
| 25434 | "#define __msa_fmax_a_d __builtin_msa_fmax_a_d\n" |
| 25435 | "#define __msa_mul_q_h __builtin_msa_mul_q_h\n" |
| 25436 | "#define __msa_mul_q_w __builtin_msa_mul_q_w\n" |
| 25437 | "#define __msa_mulr_q_h __builtin_msa_mulr_q_h\n" |
| 25438 | "#define __msa_mulr_q_w __builtin_msa_mulr_q_w\n" |
| 25439 | "#define __msa_madd_q_h __builtin_msa_madd_q_h\n" |
| 25440 | "#define __msa_madd_q_w __builtin_msa_madd_q_w\n" |
| 25441 | "#define __msa_maddr_q_h __builtin_msa_maddr_q_h\n" |
| 25442 | "#define __msa_maddr_q_w __builtin_msa_maddr_q_w\n" |
| 25443 | "#define __msa_msub_q_h __builtin_msa_msub_q_h\n" |
| 25444 | "#define __msa_msub_q_w __builtin_msa_msub_q_w\n" |
| 25445 | "#define __msa_msubr_q_h __builtin_msa_msubr_q_h\n" |
| 25446 | "#define __msa_msubr_q_w __builtin_msa_msubr_q_w\n" |
| 25447 | "#define __msa_fclass_w __builtin_msa_fclass_w\n" |
| 25448 | "#define __msa_fclass_d __builtin_msa_fclass_d\n" |
| 25449 | "#define __msa_fsqrt_w __builtin_msa_fsqrt_w\n" |
| 25450 | "#define __msa_fsqrt_d __builtin_msa_fsqrt_d\n" |
| 25451 | "#define __msa_frcp_w __builtin_msa_frcp_w\n" |
| 25452 | "#define __msa_frcp_d __builtin_msa_frcp_d\n" |
| 25453 | "#define __msa_frint_w __builtin_msa_frint_w\n" |
| 25454 | "#define __msa_frint_d __builtin_msa_frint_d\n" |
| 25455 | "#define __msa_frsqrt_w __builtin_msa_frsqrt_w\n" |
| 25456 | "#define __msa_frsqrt_d __builtin_msa_frsqrt_d\n" |
| 25457 | "#define __msa_flog2_w __builtin_msa_flog2_w\n" |
| 25458 | "#define __msa_flog2_d __builtin_msa_flog2_d\n" |
| 25459 | "#define __msa_fexupl_w __builtin_msa_fexupl_w\n" |
| 25460 | "#define __msa_fexupl_d __builtin_msa_fexupl_d\n" |
| 25461 | "#define __msa_fexupr_w __builtin_msa_fexupr_w\n" |
| 25462 | "#define __msa_fexupr_d __builtin_msa_fexupr_d\n" |
| 25463 | "#define __msa_ffql_w __builtin_msa_ffql_w\n" |
| 25464 | "#define __msa_ffql_d __builtin_msa_ffql_d\n" |
| 25465 | "#define __msa_ffqr_w __builtin_msa_ffqr_w\n" |
| 25466 | "#define __msa_ffqr_d __builtin_msa_ffqr_d\n" |
| 25467 | "#define __msa_ftint_s_w __builtin_msa_ftint_s_w\n" |
| 25468 | "#define __msa_ftint_s_d __builtin_msa_ftint_s_d\n" |
| 25469 | "#define __msa_ftint_u_w __builtin_msa_ftint_u_w\n" |
| 25470 | "#define __msa_ftint_u_d __builtin_msa_ftint_u_d\n" |
| 25471 | "#define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w\n" |
| 25472 | "#define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d\n" |
| 25473 | "#define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w\n" |
| 25474 | "#define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d\n" |
| 25475 | "#define __msa_ffint_s_w __builtin_msa_ffint_s_w\n" |
| 25476 | "#define __msa_ffint_s_d __builtin_msa_ffint_s_d\n" |
| 25477 | "#define __msa_ffint_u_w __builtin_msa_ffint_u_w\n" |
| 25478 | "#define __msa_ffint_u_d __builtin_msa_ffint_u_d\n" |
| 25479 | "#define __msa_cfcmsa __builtin_msa_cfcmsa\n" |
| 25480 | "#define __msa_move_v __builtin_msa_move_v\n" |
| 25481 | "#define __msa_cast_to_vector_float __builtin_msa_cast_to_vector_float\n" |
| 25482 | "#define __msa_cast_to_vector_double __builtin_msa_cast_to_vector_double\n" |
| 25483 | "#define __msa_cast_to_scalar_float __builtin_msa_cast_to_scalar_float\n" |
| 25484 | "#define __msa_cast_to_scalar_double __builtin_msa_cast_to_scalar_double\n" |
| 25485 | "#endif /* defined(__mips_msa) */\n" |
| 25486 | "#endif /* _MSA_H */\n" |
| 25487 | "" } , |
| 25488 | { "/builtins/mwaitxintrin.h" , "/*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------===\n" |
| 25489 | " *\n" |
| 25490 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 25491 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 25492 | " * in the Software without restriction, including without limitation the rights\n" |
| 25493 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 25494 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 25495 | " * furnished to do so, subject to the following conditions:\n" |
| 25496 | " *\n" |
| 25497 | " * The above copyright notice and this permission notice shall be included in\n" |
| 25498 | " * all copies or substantial portions of the Software.\n" |
| 25499 | " *\n" |
| 25500 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 25501 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 25502 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 25503 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 25504 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 25505 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 25506 | " * THE SOFTWARE.\n" |
| 25507 | " *\n" |
| 25508 | " *===-----------------------------------------------------------------------===\n" |
| 25509 | " */\n" |
| 25510 | "\n" |
| 25511 | "#ifndef __X86INTRIN_H\n" |
| 25512 | "#error \"Never use <mwaitxintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 25513 | "#endif\n" |
| 25514 | "\n" |
| 25515 | "#ifndef __MWAITXINTRIN_H\n" |
| 25516 | "#define __MWAITXINTRIN_H\n" |
| 25517 | "\n" |
| 25518 | "/* Define the default attributes for the functions in this file. */\n" |
| 25519 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"mwaitx\")))\n" |
| 25520 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 25521 | "_mm_monitorx(void const * __p, unsigned __extensions, unsigned __hints)\n" |
| 25522 | "{\n" |
| 25523 | " __builtin_ia32_monitorx((void *)__p, __extensions, __hints);\n" |
| 25524 | "}\n" |
| 25525 | "\n" |
| 25526 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 25527 | "_mm_mwaitx(unsigned __extensions, unsigned __hints, unsigned __clock)\n" |
| 25528 | "{\n" |
| 25529 | " __builtin_ia32_mwaitx(__extensions, __hints, __clock);\n" |
| 25530 | "}\n" |
| 25531 | "\n" |
| 25532 | "#undef __DEFAULT_FN_ATTRS\n" |
| 25533 | "\n" |
| 25534 | "#endif /* __MWAITXINTRIN_H */\n" |
| 25535 | "" } , |
| 25536 | { "/builtins/nmmintrin.h" , "/*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------===\n" |
| 25537 | " *\n" |
| 25538 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 25539 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 25540 | " * in the Software without restriction, including without limitation the rights\n" |
| 25541 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 25542 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 25543 | " * furnished to do so, subject to the following conditions:\n" |
| 25544 | " *\n" |
| 25545 | " * The above copyright notice and this permission notice shall be included in\n" |
| 25546 | " * all copies or substantial portions of the Software.\n" |
| 25547 | " *\n" |
| 25548 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 25549 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 25550 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 25551 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 25552 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 25553 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 25554 | " * THE SOFTWARE.\n" |
| 25555 | " *\n" |
| 25556 | " *===-----------------------------------------------------------------------===\n" |
| 25557 | " */\n" |
| 25558 | "\n" |
| 25559 | "#ifndef __NMMINTRIN_H\n" |
| 25560 | "#define __NMMINTRIN_H\n" |
| 25561 | "\n" |
| 25562 | "/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,\n" |
| 25563 | " just include it now then. */\n" |
| 25564 | "#include <smmintrin.h>\n" |
| 25565 | "#endif /* __NMMINTRIN_H */\n" |
| 25566 | "" } , |
| 25567 | { "/builtins/omp.h" , "/*\n" |
| 25568 | " * include/50/omp.h.var\n" |
| 25569 | " */\n" |
| 25570 | "\n" |
| 25571 | "\n" |
| 25572 | "//===----------------------------------------------------------------------===//\n" |
| 25573 | "//\n" |
| 25574 | "// The LLVM Compiler Infrastructure\n" |
| 25575 | "//\n" |
| 25576 | "// This file is dual licensed under the MIT and the University of Illinois Open\n" |
| 25577 | "// Source Licenses. See LICENSE.txt for details.\n" |
| 25578 | "//\n" |
| 25579 | "//===----------------------------------------------------------------------===//\n" |
| 25580 | "\n" |
| 25581 | "\n" |
| 25582 | "#ifndef __OMP_H\n" |
| 25583 | "# define __OMP_H\n" |
| 25584 | "\n" |
| 25585 | "# define KMP_VERSION_MAJOR 5\n" |
| 25586 | "# define KMP_VERSION_MINOR 0\n" |
| 25587 | "# define KMP_VERSION_BUILD 20140926\n" |
| 25588 | "# define KMP_BUILD_DATE \"No_Timestamp\"\n" |
| 25589 | "\n" |
| 25590 | "# ifdef __cplusplus\n" |
| 25591 | " extern \"C\" {\n" |
| 25592 | "# endif\n" |
| 25593 | "\n" |
| 25594 | "# if defined(_WIN32)\n" |
| 25595 | "# define __KAI_KMPC_CONVENTION __cdecl\n" |
| 25596 | "# else\n" |
| 25597 | "# define __KAI_KMPC_CONVENTION\n" |
| 25598 | "# endif\n" |
| 25599 | "\n" |
| 25600 | " /* schedule kind constants */\n" |
| 25601 | " typedef enum omp_sched_t {\n" |
| 25602 | " omp_sched_static = 1,\n" |
| 25603 | " omp_sched_dynamic = 2,\n" |
| 25604 | " omp_sched_guided = 3,\n" |
| 25605 | " omp_sched_auto = 4\n" |
| 25606 | " } omp_sched_t;\n" |
| 25607 | "\n" |
| 25608 | " /* set API functions */\n" |
| 25609 | " extern void __KAI_KMPC_CONVENTION omp_set_num_threads (int);\n" |
| 25610 | " extern void __KAI_KMPC_CONVENTION omp_set_dynamic (int);\n" |
| 25611 | " extern void __KAI_KMPC_CONVENTION omp_set_nested (int);\n" |
| 25612 | " extern void __KAI_KMPC_CONVENTION omp_set_max_active_levels (int);\n" |
| 25613 | " extern void __KAI_KMPC_CONVENTION omp_set_schedule (omp_sched_t, int);\n" |
| 25614 | "\n" |
| 25615 | " /* query API functions */\n" |
| 25616 | " extern int __KAI_KMPC_CONVENTION omp_get_num_threads (void);\n" |
| 25617 | " extern int __KAI_KMPC_CONVENTION omp_get_dynamic (void);\n" |
| 25618 | " extern int __KAI_KMPC_CONVENTION omp_get_nested (void);\n" |
| 25619 | " extern int __KAI_KMPC_CONVENTION omp_get_max_threads (void);\n" |
| 25620 | " extern int __KAI_KMPC_CONVENTION omp_get_thread_num (void);\n" |
| 25621 | " extern int __KAI_KMPC_CONVENTION omp_get_num_procs (void);\n" |
| 25622 | " extern int __KAI_KMPC_CONVENTION omp_in_parallel (void);\n" |
| 25623 | " extern int __KAI_KMPC_CONVENTION omp_in_final (void);\n" |
| 25624 | " extern int __KAI_KMPC_CONVENTION omp_get_active_level (void);\n" |
| 25625 | " extern int __KAI_KMPC_CONVENTION omp_get_level (void);\n" |
| 25626 | " extern int __KAI_KMPC_CONVENTION omp_get_ancestor_thread_num (int);\n" |
| 25627 | " extern int __KAI_KMPC_CONVENTION omp_get_team_size (int);\n" |
| 25628 | " extern int __KAI_KMPC_CONVENTION omp_get_thread_limit (void);\n" |
| 25629 | " extern int __KAI_KMPC_CONVENTION omp_get_max_active_levels (void);\n" |
| 25630 | " extern void __KAI_KMPC_CONVENTION omp_get_schedule (omp_sched_t *, int *);\n" |
| 25631 | " extern int __KAI_KMPC_CONVENTION omp_get_max_task_priority (void);\n" |
| 25632 | "\n" |
| 25633 | " /* lock API functions */\n" |
| 25634 | " typedef struct omp_lock_t {\n" |
| 25635 | " void * _lk;\n" |
| 25636 | " } omp_lock_t;\n" |
| 25637 | "\n" |
| 25638 | " extern void __KAI_KMPC_CONVENTION omp_init_lock (omp_lock_t *);\n" |
| 25639 | " extern void __KAI_KMPC_CONVENTION omp_set_lock (omp_lock_t *);\n" |
| 25640 | " extern void __KAI_KMPC_CONVENTION omp_unset_lock (omp_lock_t *);\n" |
| 25641 | " extern void __KAI_KMPC_CONVENTION omp_destroy_lock (omp_lock_t *);\n" |
| 25642 | " extern int __KAI_KMPC_CONVENTION omp_test_lock (omp_lock_t *);\n" |
| 25643 | "\n" |
| 25644 | " /* nested lock API functions */\n" |
| 25645 | " typedef struct omp_nest_lock_t {\n" |
| 25646 | " void * _lk;\n" |
| 25647 | " } omp_nest_lock_t;\n" |
| 25648 | "\n" |
| 25649 | " extern void __KAI_KMPC_CONVENTION omp_init_nest_lock (omp_nest_lock_t *);\n" |
| 25650 | " extern void __KAI_KMPC_CONVENTION omp_set_nest_lock (omp_nest_lock_t *);\n" |
| 25651 | " extern void __KAI_KMPC_CONVENTION omp_unset_nest_lock (omp_nest_lock_t *);\n" |
| 25652 | " extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *);\n" |
| 25653 | " extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *);\n" |
| 25654 | "\n" |
| 25655 | " /* lock hint type for dynamic user lock */\n" |
| 25656 | " typedef enum omp_lock_hint_t {\n" |
| 25657 | " omp_lock_hint_none = 0,\n" |
| 25658 | " omp_lock_hint_uncontended = 1,\n" |
| 25659 | " omp_lock_hint_contended = (1<<1 ),\n" |
| 25660 | " omp_lock_hint_nonspeculative = (1<<2 ),\n" |
| 25661 | " omp_lock_hint_speculative = (1<<3 ),\n" |
| 25662 | " kmp_lock_hint_hle = (1<<16),\n" |
| 25663 | " kmp_lock_hint_rtm = (1<<17),\n" |
| 25664 | " kmp_lock_hint_adaptive = (1<<18)\n" |
| 25665 | " } omp_lock_hint_t;\n" |
| 25666 | "\n" |
| 25667 | " /* hinted lock initializers */\n" |
| 25668 | " extern void __KAI_KMPC_CONVENTION omp_init_lock_with_hint(omp_lock_t *, omp_lock_hint_t);\n" |
| 25669 | " extern void __KAI_KMPC_CONVENTION omp_init_nest_lock_with_hint(omp_nest_lock_t *, omp_lock_hint_t);\n" |
| 25670 | "\n" |
| 25671 | " /* time API functions */\n" |
| 25672 | " extern double __KAI_KMPC_CONVENTION omp_get_wtime (void);\n" |
| 25673 | " extern double __KAI_KMPC_CONVENTION omp_get_wtick (void);\n" |
| 25674 | "\n" |
| 25675 | " /* OpenMP 4.0 */\n" |
| 25676 | " extern int __KAI_KMPC_CONVENTION omp_get_default_device (void);\n" |
| 25677 | " extern void __KAI_KMPC_CONVENTION omp_set_default_device (int);\n" |
| 25678 | " extern int __KAI_KMPC_CONVENTION omp_is_initial_device (void);\n" |
| 25679 | " extern int __KAI_KMPC_CONVENTION omp_get_num_devices (void);\n" |
| 25680 | " extern int __KAI_KMPC_CONVENTION omp_get_num_teams (void);\n" |
| 25681 | " extern int __KAI_KMPC_CONVENTION omp_get_team_num (void);\n" |
| 25682 | " extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void);\n" |
| 25683 | "\n" |
| 25684 | "# include <stdlib.h>\n" |
| 25685 | " /* OpenMP 4.5 */\n" |
| 25686 | " extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void);\n" |
| 25687 | " extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int);\n" |
| 25688 | " extern void __KAI_KMPC_CONVENTION omp_target_free(void *, int);\n" |
| 25689 | " extern int __KAI_KMPC_CONVENTION omp_target_is_present(void *, int);\n" |
| 25690 | " extern int __KAI_KMPC_CONVENTION omp_target_memcpy(void *, void *, size_t, size_t, size_t, int, int);\n" |
| 25691 | " extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect(void *, void *, size_t, int, const size_t *,\n" |
| 25692 | " const size_t *, const size_t *, const size_t *, const size_t *, int, int);\n" |
| 25693 | " extern int __KAI_KMPC_CONVENTION omp_target_associate_ptr(void *, void *, size_t, size_t, int);\n" |
| 25694 | " extern int __KAI_KMPC_CONVENTION omp_target_disassociate_ptr(void *, int);\n" |
| 25695 | "\n" |
| 25696 | " /* kmp API functions */\n" |
| 25697 | " extern int __KAI_KMPC_CONVENTION kmp_get_stacksize (void);\n" |
| 25698 | " extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int);\n" |
| 25699 | " extern size_t __KAI_KMPC_CONVENTION kmp_get_stacksize_s (void);\n" |
| 25700 | " extern void __KAI_KMPC_CONVENTION kmp_set_stacksize_s (size_t);\n" |
| 25701 | " extern int __KAI_KMPC_CONVENTION kmp_get_blocktime (void);\n" |
| 25702 | " extern int __KAI_KMPC_CONVENTION kmp_get_library (void);\n" |
| 25703 | " extern void __KAI_KMPC_CONVENTION kmp_set_blocktime (int);\n" |
| 25704 | " extern void __KAI_KMPC_CONVENTION kmp_set_library (int);\n" |
| 25705 | " extern void __KAI_KMPC_CONVENTION kmp_set_library_serial (void);\n" |
| 25706 | " extern void __KAI_KMPC_CONVENTION kmp_set_library_turnaround (void);\n" |
| 25707 | " extern void __KAI_KMPC_CONVENTION kmp_set_library_throughput (void);\n" |
| 25708 | " extern void __KAI_KMPC_CONVENTION kmp_set_defaults (char const *);\n" |
| 25709 | " extern void __KAI_KMPC_CONVENTION kmp_set_disp_num_buffers (int);\n" |
| 25710 | "\n" |
| 25711 | " /* Intel affinity API */\n" |
| 25712 | " typedef void * kmp_affinity_mask_t;\n" |
| 25713 | "\n" |
| 25714 | " extern int __KAI_KMPC_CONVENTION kmp_set_affinity (kmp_affinity_mask_t *);\n" |
| 25715 | " extern int __KAI_KMPC_CONVENTION kmp_get_affinity (kmp_affinity_mask_t *);\n" |
| 25716 | " extern int __KAI_KMPC_CONVENTION kmp_get_affinity_max_proc (void);\n" |
| 25717 | " extern void __KAI_KMPC_CONVENTION kmp_create_affinity_mask (kmp_affinity_mask_t *);\n" |
| 25718 | " extern void __KAI_KMPC_CONVENTION kmp_destroy_affinity_mask (kmp_affinity_mask_t *);\n" |
| 25719 | " extern int __KAI_KMPC_CONVENTION kmp_set_affinity_mask_proc (int, kmp_affinity_mask_t *);\n" |
| 25720 | " extern int __KAI_KMPC_CONVENTION kmp_unset_affinity_mask_proc (int, kmp_affinity_mask_t *);\n" |
| 25721 | " extern int __KAI_KMPC_CONVENTION kmp_get_affinity_mask_proc (int, kmp_affinity_mask_t *);\n" |
| 25722 | "\n" |
| 25723 | " /* OpenMP 4.0 affinity API */\n" |
| 25724 | " typedef enum omp_proc_bind_t {\n" |
| 25725 | " omp_proc_bind_false = 0,\n" |
| 25726 | " omp_proc_bind_true = 1,\n" |
| 25727 | " omp_proc_bind_master = 2,\n" |
| 25728 | " omp_proc_bind_close = 3,\n" |
| 25729 | " omp_proc_bind_spread = 4\n" |
| 25730 | " } omp_proc_bind_t;\n" |
| 25731 | "\n" |
| 25732 | " extern omp_proc_bind_t __KAI_KMPC_CONVENTION omp_get_proc_bind (void);\n" |
| 25733 | "\n" |
| 25734 | " /* OpenMP 4.5 affinity API */\n" |
| 25735 | " extern int __KAI_KMPC_CONVENTION omp_get_num_places (void);\n" |
| 25736 | " extern int __KAI_KMPC_CONVENTION omp_get_place_num_procs (int);\n" |
| 25737 | " extern void __KAI_KMPC_CONVENTION omp_get_place_proc_ids (int, int *);\n" |
| 25738 | " extern int __KAI_KMPC_CONVENTION omp_get_place_num (void);\n" |
| 25739 | " extern int __KAI_KMPC_CONVENTION omp_get_partition_num_places (void);\n" |
| 25740 | " extern void __KAI_KMPC_CONVENTION omp_get_partition_place_nums (int *);\n" |
| 25741 | "\n" |
| 25742 | " extern void * __KAI_KMPC_CONVENTION kmp_malloc (size_t);\n" |
| 25743 | " extern void * __KAI_KMPC_CONVENTION kmp_aligned_malloc (size_t, size_t);\n" |
| 25744 | " extern void * __KAI_KMPC_CONVENTION kmp_calloc (size_t, size_t);\n" |
| 25745 | " extern void * __KAI_KMPC_CONVENTION kmp_realloc (void *, size_t);\n" |
| 25746 | " extern void __KAI_KMPC_CONVENTION kmp_free (void *);\n" |
| 25747 | "\n" |
| 25748 | " extern void __KAI_KMPC_CONVENTION kmp_set_warnings_on(void);\n" |
| 25749 | " extern void __KAI_KMPC_CONVENTION kmp_set_warnings_off(void);\n" |
| 25750 | "\n" |
| 25751 | " /* OpenMP 5.0 Tool Control */\n" |
| 25752 | " typedef enum omp_control_tool_result_t {\n" |
| 25753 | " omp_control_tool_notool = -2,\n" |
| 25754 | " omp_control_tool_nocallback = -1,\n" |
| 25755 | " omp_control_tool_success = 0,\n" |
| 25756 | " omp_control_tool_ignored = 1\n" |
| 25757 | " } omp_control_tool_result_t;\n" |
| 25758 | "\n" |
| 25759 | " typedef enum omp_control_tool_t {\n" |
| 25760 | " omp_control_tool_start = 1,\n" |
| 25761 | " omp_control_tool_pause = 2,\n" |
| 25762 | " omp_control_tool_flush = 3,\n" |
| 25763 | " omp_control_tool_end = 4\n" |
| 25764 | " } omp_control_tool_t;\n" |
| 25765 | " \n" |
| 25766 | " extern int __KAI_KMPC_CONVENTION omp_control_tool(int, int, void*);\n" |
| 25767 | "\n" |
| 25768 | "# undef __KAI_KMPC_CONVENTION\n" |
| 25769 | "\n" |
| 25770 | " /* Warning:\n" |
| 25771 | " The following typedefs are not standard, deprecated and will be removed in a future release.\n" |
| 25772 | " */\n" |
| 25773 | " typedef int omp_int_t;\n" |
| 25774 | " typedef double omp_wtime_t;\n" |
| 25775 | "\n" |
| 25776 | "# ifdef __cplusplus\n" |
| 25777 | " }\n" |
| 25778 | "# endif\n" |
| 25779 | "\n" |
| 25780 | "#endif /* __OMP_H */\n" |
| 25781 | "\n" |
| 25782 | "" } , |
| 25783 | { "/builtins/ompt.h" , "/*\n" |
| 25784 | " * include/50/ompt.h.var\n" |
| 25785 | " */\n" |
| 25786 | "\n" |
| 25787 | "//===----------------------------------------------------------------------===//\n" |
| 25788 | "//\n" |
| 25789 | "// The LLVM Compiler Infrastructure\n" |
| 25790 | "//\n" |
| 25791 | "// This file is dual licensed under the MIT and the University of Illinois Open\n" |
| 25792 | "// Source Licenses. See LICENSE.txt for details.\n" |
| 25793 | "//\n" |
| 25794 | "//===----------------------------------------------------------------------===//\n" |
| 25795 | "\n" |
| 25796 | "#ifndef __OMPT__\n" |
| 25797 | "#define __OMPT__\n" |
| 25798 | "\n" |
| 25799 | "/*****************************************************************************\n" |
| 25800 | " * system include files\n" |
| 25801 | " *****************************************************************************/\n" |
| 25802 | "\n" |
| 25803 | "#include <stdint.h>\n" |
| 25804 | "#include <stddef.h>\n" |
| 25805 | "\n" |
| 25806 | "\n" |
| 25807 | "\n" |
| 25808 | "/*****************************************************************************\n" |
| 25809 | " * iteration macros\n" |
| 25810 | " *****************************************************************************/\n" |
| 25811 | "\n" |
| 25812 | "#define FOREACH_OMPT_INQUIRY_FN(macro) \\\n" |
| 25813 | " macro (ompt_enumerate_states) \\\n" |
| 25814 | " macro (ompt_enumerate_mutex_impls) \\\n" |
| 25815 | " \\\n" |
| 25816 | " macro (ompt_set_callback) \\\n" |
| 25817 | " macro (ompt_get_callback) \\\n" |
| 25818 | " \\\n" |
| 25819 | " macro (ompt_get_state) \\\n" |
| 25820 | " \\\n" |
| 25821 | " macro (ompt_get_parallel_info) \\\n" |
| 25822 | " macro (ompt_get_task_info) \\\n" |
| 25823 | " macro (ompt_get_thread_data) \\\n" |
| 25824 | " macro (ompt_get_unique_id) \\\n" |
| 25825 | " \\\n" |
| 25826 | " macro(ompt_get_num_procs) \\\n" |
| 25827 | " macro(ompt_get_num_places) \\\n" |
| 25828 | " macro(ompt_get_place_proc_ids) \\\n" |
| 25829 | " macro(ompt_get_place_num) \\\n" |
| 25830 | " macro(ompt_get_partition_place_nums) \\\n" |
| 25831 | " macro(ompt_get_proc_id) \\\n" |
| 25832 | " \\\n" |
| 25833 | " macro(ompt_get_target_info) \\\n" |
| 25834 | " macro(ompt_get_num_devices)\n" |
| 25835 | "\n" |
| 25836 | "#define FOREACH_OMP_STATE(macro) \\\n" |
| 25837 | " \\\n" |
| 25838 | " /* first available state */ \\\n" |
| 25839 | " macro (omp_state_undefined, 0x102) /* undefined thread state */ \\\n" |
| 25840 | " \\\n" |
| 25841 | " /* work states (0..15) */ \\\n" |
| 25842 | " macro (omp_state_work_serial, 0x000) /* working outside parallel */ \\\n" |
| 25843 | " macro (omp_state_work_parallel, 0x001) /* working within parallel */ \\\n" |
| 25844 | " macro (omp_state_work_reduction, 0x002) /* performing a reduction */ \\\n" |
| 25845 | " \\\n" |
| 25846 | " /* barrier wait states (16..31) */ \\\n" |
| 25847 | " macro (omp_state_wait_barrier, 0x010) /* waiting at a barrier */ \\\n" |
| 25848 | " macro (omp_state_wait_barrier_implicit_parallel, 0x011) \\\n" |
| 25849 | " /* implicit barrier at the end of parallel region */\\\n" |
| 25850 | " macro (omp_state_wait_barrier_implicit_workshare, 0x012) \\\n" |
| 25851 | " /* implicit barrier at the end of worksharing */ \\\n" |
| 25852 | " macro (omp_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \\\n" |
| 25853 | " macro (omp_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \\\n" |
| 25854 | " \\\n" |
| 25855 | " /* task wait states (32..63) */ \\\n" |
| 25856 | " macro (omp_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \\\n" |
| 25857 | " macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \\\n" |
| 25858 | " \\\n" |
| 25859 | " /* mutex wait states (64..127) */ \\\n" |
| 25860 | " macro (omp_state_wait_mutex, 0x040) \\\n" |
| 25861 | " macro (omp_state_wait_lock, 0x041) /* waiting for lock */ \\\n" |
| 25862 | " macro (omp_state_wait_critical, 0x042) /* waiting for critical */ \\\n" |
| 25863 | " macro (omp_state_wait_atomic, 0x043) /* waiting for atomic */ \\\n" |
| 25864 | " macro (omp_state_wait_ordered, 0x044) /* waiting for ordered */ \\\n" |
| 25865 | " \\\n" |
| 25866 | " /* target wait states (128..255) */ \\\n" |
| 25867 | " macro (omp_state_wait_target, 0x080) /* waiting for target region */ \\\n" |
| 25868 | " macro (omp_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \\\n" |
| 25869 | " macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */ \\\n" |
| 25870 | " \\\n" |
| 25871 | " /* misc (256..511) */ \\\n" |
| 25872 | " macro (omp_state_idle, 0x100) /* waiting for work */ \\\n" |
| 25873 | " macro (omp_state_overhead, 0x101) /* overhead excluding wait states */ \\\n" |
| 25874 | " \\\n" |
| 25875 | " /* implementation-specific states (512..) */\n" |
| 25876 | "\n" |
| 25877 | "\n" |
| 25878 | "#define FOREACH_KMP_MUTEX_IMPL(macro) \\\n" |
| 25879 | " macro (ompt_mutex_impl_unknown, 0) /* unknown implementation */ \\\n" |
| 25880 | " macro (kmp_mutex_impl_spin, 1) /* based on spin */ \\\n" |
| 25881 | " macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \\\n" |
| 25882 | " macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */\n" |
| 25883 | "\n" |
| 25884 | "#define FOREACH_OMPT_EVENT(macro) \\\n" |
| 25885 | " \\\n" |
| 25886 | " /*--- Mandatory Events ---*/ \\\n" |
| 25887 | " macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \\\n" |
| 25888 | " macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \\\n" |
| 25889 | " \\\n" |
| 25890 | " macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \\\n" |
| 25891 | " macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \\\n" |
| 25892 | " \\\n" |
| 25893 | " macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \\\n" |
| 25894 | " macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \\\n" |
| 25895 | " macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \\\n" |
| 25896 | " \\\n" |
| 25897 | " macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \\\n" |
| 25898 | " macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \\\n" |
| 25899 | " macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \\\n" |
| 25900 | " \\\n" |
| 25901 | " macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \\\n" |
| 25902 | " \\\n" |
| 25903 | " macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \\\n" |
| 25904 | " macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \\\n" |
| 25905 | " \\\n" |
| 25906 | " macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \\\n" |
| 25907 | " macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \\\n" |
| 25908 | " \\\n" |
| 25909 | " /* Optional Events */ \\\n" |
| 25910 | " macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \\\n" |
| 25911 | " \\\n" |
| 25912 | " macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \\\n" |
| 25913 | " \\\n" |
| 25914 | " macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 18) /* report task dependences */ \\\n" |
| 25915 | " macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \\\n" |
| 25916 | " \\\n" |
| 25917 | " macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \\\n" |
| 25918 | " \\\n" |
| 25919 | " macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \\\n" |
| 25920 | " \\\n" |
| 25921 | " macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \\\n" |
| 25922 | " \\\n" |
| 25923 | " macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \\\n" |
| 25924 | " \\\n" |
| 25925 | " macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \\\n" |
| 25926 | " macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \\\n" |
| 25927 | " \\\n" |
| 25928 | " macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \\\n" |
| 25929 | " macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \\\n" |
| 25930 | " \\\n" |
| 25931 | " macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \\\n" |
| 25932 | " \\\n" |
| 25933 | " macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \\\n" |
| 25934 | " \\\n" |
| 25935 | " macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \\\n" |
| 25936 | " macro (ompt_callback_idle, ompt_callback_idle_t, 31) /* begin or end idle state */\n" |
| 25937 | "\n" |
| 25938 | "\n" |
| 25939 | "\n" |
| 25940 | "/*****************************************************************************\n" |
| 25941 | " * data types\n" |
| 25942 | " *****************************************************************************/\n" |
| 25943 | "\n" |
| 25944 | "/*---------------------\n" |
| 25945 | " * identifiers\n" |
| 25946 | " *---------------------*/\n" |
| 25947 | "\n" |
| 25948 | "typedef uint64_t ompt_id_t;\n" |
| 25949 | "#define ompt_id_none 0\n" |
| 25950 | "\n" |
| 25951 | "typedef union ompt_data_t {\n" |
| 25952 | " uint64_t value; /* data initialized by runtime to unique id */\n" |
| 25953 | " void *ptr; /* pointer under tool control */\n" |
| 25954 | "} ompt_data_t;\n" |
| 25955 | "\n" |
| 25956 | "static const ompt_data_t ompt_data_none = {0};\n" |
| 25957 | "\n" |
| 25958 | "typedef uint64_t omp_wait_id_t;\n" |
| 25959 | "static const omp_wait_id_t omp_wait_id_none = 0;\n" |
| 25960 | "\n" |
| 25961 | "typedef void ompt_device_t;\n" |
| 25962 | "\n" |
| 25963 | "/*---------------------\n" |
| 25964 | " * omp_frame_t\n" |
| 25965 | " *---------------------*/\n" |
| 25966 | "\n" |
| 25967 | "typedef struct omp_frame_t {\n" |
| 25968 | " void *exit_frame; /* next frame is user code */\n" |
| 25969 | " void *enter_frame; /* previous frame is user code */\n" |
| 25970 | "} omp_frame_t;\n" |
| 25971 | "\n" |
| 25972 | "\n" |
| 25973 | "/*---------------------\n" |
| 25974 | " * dependences types\n" |
| 25975 | " *---------------------*/\n" |
| 25976 | "\n" |
| 25977 | "typedef enum ompt_task_dependence_flag_t {\n" |
| 25978 | " // a two bit field for the dependence type\n" |
| 25979 | " ompt_task_dependence_type_out = 1,\n" |
| 25980 | " ompt_task_dependence_type_in = 2,\n" |
| 25981 | " ompt_task_dependence_type_inout = 3,\n" |
| 25982 | "} ompt_task_dependence_flag_t;\n" |
| 25983 | "\n" |
| 25984 | "typedef struct ompt_task_dependence_t {\n" |
| 25985 | " void *variable_addr;\n" |
| 25986 | " unsigned int dependence_flags;\n" |
| 25987 | "} ompt_task_dependence_t;\n" |
| 25988 | "\n" |
| 25989 | "\n" |
| 25990 | "/*****************************************************************************\n" |
| 25991 | " * enumerations for thread states and runtime events\n" |
| 25992 | " *****************************************************************************/\n" |
| 25993 | "\n" |
| 25994 | "/*---------------------\n" |
| 25995 | " * runtime states\n" |
| 25996 | " *---------------------*/\n" |
| 25997 | "\n" |
| 25998 | "typedef enum {\n" |
| 25999 | "#define omp_state_macro(state, code) state = code,\n" |
| 26000 | " FOREACH_OMP_STATE(omp_state_macro)\n" |
| 26001 | "#undef omp_state_macro\n" |
| 26002 | "} omp_state_t;\n" |
| 26003 | "\n" |
| 26004 | "\n" |
| 26005 | "/*---------------------\n" |
| 26006 | " * runtime events\n" |
| 26007 | " *---------------------*/\n" |
| 26008 | "\n" |
| 26009 | "typedef enum ompt_callbacks_e{\n" |
| 26010 | "#define ompt_event_macro(event, callback, eventid) event = eventid,\n" |
| 26011 | " FOREACH_OMPT_EVENT(ompt_event_macro)\n" |
| 26012 | "#undef ompt_event_macro\n" |
| 26013 | "} ompt_callbacks_t;\n" |
| 26014 | "\n" |
| 26015 | "\n" |
| 26016 | "/*---------------------\n" |
| 26017 | " * set callback results\n" |
| 26018 | " *---------------------*/\n" |
| 26019 | "typedef enum ompt_set_result_t {\n" |
| 26020 | " ompt_set_error = 0,\n" |
| 26021 | " ompt_set_never = 1,\n" |
| 26022 | " ompt_set_sometimes = 2,\n" |
| 26023 | " ompt_set_sometimes_paired = 3,\n" |
| 26024 | " ompt_set_always = 4\n" |
| 26025 | "} ompt_set_result_t;\n" |
| 26026 | "\n" |
| 26027 | "\n" |
| 26028 | "/*----------------------\n" |
| 26029 | " * mutex implementations\n" |
| 26030 | " *----------------------*/\n" |
| 26031 | "typedef enum kmp_mutex_impl_t {\n" |
| 26032 | "#define kmp_mutex_impl_macro(impl, code) impl = code,\n" |
| 26033 | " FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)\n" |
| 26034 | "#undef kmp_mutex_impl_macro\n" |
| 26035 | "} kmp_mutex_impl_t;\n" |
| 26036 | "\n" |
| 26037 | "\n" |
| 26038 | "/*****************************************************************************\n" |
| 26039 | " * callback signatures\n" |
| 26040 | " *****************************************************************************/\n" |
| 26041 | "\n" |
| 26042 | "/* initialization */\n" |
| 26043 | "typedef void (*ompt_interface_fn_t)(void);\n" |
| 26044 | "\n" |
| 26045 | "typedef ompt_interface_fn_t (*ompt_function_lookup_t)(\n" |
| 26046 | " const char * /* entry point to look up */\n" |
| 26047 | ");\n" |
| 26048 | "\n" |
| 26049 | "/* threads */\n" |
| 26050 | "typedef enum ompt_thread_type_t {\n" |
| 26051 | " ompt_thread_initial = 1, // start the enumeration at 1\n" |
| 26052 | " ompt_thread_worker = 2,\n" |
| 26053 | " ompt_thread_other = 3,\n" |
| 26054 | " ompt_thread_unknown = 4\n" |
| 26055 | "} ompt_thread_type_t;\n" |
| 26056 | "\n" |
| 26057 | "typedef enum ompt_invoker_t {\n" |
| 26058 | " ompt_invoker_program = 1, /* program invokes master task */\n" |
| 26059 | " ompt_invoker_runtime = 2 /* runtime invokes master task */\n" |
| 26060 | "} ompt_invoker_t;\n" |
| 26061 | "\n" |
| 26062 | "typedef void (*ompt_callback_thread_begin_t) (\n" |
| 26063 | " ompt_thread_type_t thread_type, /* type of thread */\n" |
| 26064 | " ompt_data_t *thread_data /* data of thread */\n" |
| 26065 | ");\n" |
| 26066 | "\n" |
| 26067 | "typedef void (*ompt_callback_thread_end_t) (\n" |
| 26068 | " ompt_data_t *thread_data /* data of thread */\n" |
| 26069 | ");\n" |
| 26070 | "\n" |
| 26071 | "typedef void (*ompt_wait_callback_t) (\n" |
| 26072 | " omp_wait_id_t wait_id /* wait data */\n" |
| 26073 | ");\n" |
| 26074 | "\n" |
| 26075 | "/* parallel and workshares */\n" |
| 26076 | "typedef enum ompt_scope_endpoint_t {\n" |
| 26077 | " ompt_scope_begin = 1,\n" |
| 26078 | " ompt_scope_end = 2\n" |
| 26079 | "} ompt_scope_endpoint_t;\n" |
| 26080 | "\n" |
| 26081 | "\n" |
| 26082 | "/* implicit task */\n" |
| 26083 | "typedef void (*ompt_callback_implicit_task_t) (\n" |
| 26084 | " ompt_scope_endpoint_t endpoint, /* endpoint of implicit task */\n" |
| 26085 | " ompt_data_t *parallel_data, /* data of parallel region */\n" |
| 26086 | " ompt_data_t *task_data, /* data of implicit task */\n" |
| 26087 | " unsigned int team_size, /* team size */\n" |
| 26088 | " unsigned int thread_num /* thread number of calling thread */\n" |
| 26089 | ");\n" |
| 26090 | "\n" |
| 26091 | "typedef void (*ompt_callback_parallel_begin_t) (\n" |
| 26092 | " ompt_data_t *encountering_task_data, /* data of encountering task */\n" |
| 26093 | " const omp_frame_t *encountering_task_frame, /* frame data of encountering task */\n" |
| 26094 | " ompt_data_t *parallel_data, /* data of parallel region */\n" |
| 26095 | " unsigned int requested_team_size, /* requested number of threads in team */\n" |
| 26096 | " ompt_invoker_t invoker, /* invoker of master task */\n" |
| 26097 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26098 | ");\n" |
| 26099 | "\n" |
| 26100 | "typedef void (*ompt_callback_parallel_end_t) (\n" |
| 26101 | " ompt_data_t *parallel_data, /* data of parallel region */\n" |
| 26102 | " ompt_data_t *encountering_task_data, /* data of encountering task */\n" |
| 26103 | " ompt_invoker_t invoker, /* invoker of master task */ \n" |
| 26104 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26105 | ");\n" |
| 26106 | "\n" |
| 26107 | "/* tasks */\n" |
| 26108 | "typedef enum ompt_task_type_t {\n" |
| 26109 | " ompt_task_initial = 0x1,\n" |
| 26110 | " ompt_task_implicit = 0x2,\n" |
| 26111 | " ompt_task_explicit = 0x4,\n" |
| 26112 | " ompt_task_target = 0x8,\n" |
| 26113 | " ompt_task_undeferred = 0x8000000,\n" |
| 26114 | " ompt_task_untied = 0x10000000,\n" |
| 26115 | " ompt_task_final = 0x20000000,\n" |
| 26116 | " ompt_task_mergeable = 0x40000000,\n" |
| 26117 | " ompt_task_merged = 0x80000000\n" |
| 26118 | "} ompt_task_type_t;\n" |
| 26119 | "\n" |
| 26120 | "typedef enum ompt_task_status_t {\n" |
| 26121 | " ompt_task_complete = 1,\n" |
| 26122 | " ompt_task_yield = 2,\n" |
| 26123 | " ompt_task_cancel = 3,\n" |
| 26124 | " ompt_task_others = 4\n" |
| 26125 | "} ompt_task_status_t;\n" |
| 26126 | "\n" |
| 26127 | "typedef void (*ompt_callback_task_schedule_t) (\n" |
| 26128 | " ompt_data_t *prior_task_data, /* data of prior task */\n" |
| 26129 | " ompt_task_status_t prior_task_status, /* status of prior task */\n" |
| 26130 | " ompt_data_t *next_task_data /* data of next task */\n" |
| 26131 | ");\n" |
| 26132 | "\n" |
| 26133 | "typedef void (*ompt_callback_task_create_t) (\n" |
| 26134 | " ompt_data_t *encountering_task_data, /* data of parent task */\n" |
| 26135 | " const omp_frame_t *encountering_task_frame, /* frame data for parent task */\n" |
| 26136 | " ompt_data_t *new_task_data, /* data of created task */\n" |
| 26137 | " int type, /* type of created task */\n" |
| 26138 | " int has_dependences, /* created task has dependences */\n" |
| 26139 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26140 | ");\n" |
| 26141 | "\n" |
| 26142 | "/* task dependences */\n" |
| 26143 | "typedef void (*ompt_callback_task_dependences_t) (\n" |
| 26144 | " ompt_data_t *task_data, /* data of task */\n" |
| 26145 | " const ompt_task_dependence_t *deps, /* dependences of task */\n" |
| 26146 | " int ndeps /* dependences count of task */\n" |
| 26147 | ");\n" |
| 26148 | "\n" |
| 26149 | "typedef void (*ompt_callback_task_dependence_t) (\n" |
| 26150 | " ompt_data_t *src_task_data, /* data of source task */\n" |
| 26151 | " ompt_data_t *sink_task_data /* data of sink task */\n" |
| 26152 | ");\n" |
| 26153 | "\n" |
| 26154 | "/* target and device */\n" |
| 26155 | "typedef enum ompt_target_type_t {\n" |
| 26156 | " ompt_target = 1,\n" |
| 26157 | " ompt_target_enter_data = 2,\n" |
| 26158 | " ompt_target_exit_data = 3,\n" |
| 26159 | " ompt_target_update = 4\n" |
| 26160 | "} ompt_target_type_t;\n" |
| 26161 | "\n" |
| 26162 | "typedef void (*ompt_callback_target_t) (\n" |
| 26163 | " ompt_target_type_t kind,\n" |
| 26164 | " ompt_scope_endpoint_t endpoint,\n" |
| 26165 | " uint64_t device_num,\n" |
| 26166 | " ompt_data_t *task_data,\n" |
| 26167 | " ompt_id_t target_id,\n" |
| 26168 | " const void *codeptr_ra\n" |
| 26169 | ");\n" |
| 26170 | "\n" |
| 26171 | "typedef enum ompt_target_data_op_t {\n" |
| 26172 | " ompt_target_data_alloc = 1,\n" |
| 26173 | " ompt_target_data_transfer_to_dev = 2,\n" |
| 26174 | " ompt_target_data_transfer_from_dev = 3,\n" |
| 26175 | " ompt_target_data_delete = 4\n" |
| 26176 | "} ompt_target_data_op_t;\n" |
| 26177 | "\n" |
| 26178 | "typedef void (*ompt_callback_target_data_op_t) (\n" |
| 26179 | " ompt_id_t target_id,\n" |
| 26180 | " ompt_id_t host_op_id,\n" |
| 26181 | " ompt_target_data_op_t optype,\n" |
| 26182 | " void *host_addr,\n" |
| 26183 | " void *device_addr,\n" |
| 26184 | " size_t bytes\n" |
| 26185 | ");\n" |
| 26186 | "\n" |
| 26187 | "typedef void (*ompt_callback_target_submit_t) (\n" |
| 26188 | " ompt_id_t target_id,\n" |
| 26189 | " ompt_id_t host_op_id\n" |
| 26190 | ");\n" |
| 26191 | "\n" |
| 26192 | "typedef void (*ompt_callback_target_map_t) (\n" |
| 26193 | " ompt_id_t target_id,\n" |
| 26194 | " unsigned int nitems,\n" |
| 26195 | " void **host_addr,\n" |
| 26196 | " void **device_addr,\n" |
| 26197 | " size_t *bytes,\n" |
| 26198 | " unsigned int *mapping_flags\n" |
| 26199 | ");\n" |
| 26200 | "\n" |
| 26201 | "typedef void (*ompt_callback_device_initialize_t) (\n" |
| 26202 | " uint64_t device_num,\n" |
| 26203 | " const char *type,\n" |
| 26204 | " ompt_device_t *device,\n" |
| 26205 | " ompt_function_lookup_t lookup,\n" |
| 26206 | " const char *documentation\n" |
| 26207 | ");\n" |
| 26208 | "\n" |
| 26209 | "typedef void (*ompt_callback_device_finalize_t) (\n" |
| 26210 | " uint64_t device_num\n" |
| 26211 | ");\n" |
| 26212 | "\n" |
| 26213 | "typedef void (*ompt_callback_device_load_t) (\n" |
| 26214 | " uint64_t device_num,\n" |
| 26215 | " const char * filename,\n" |
| 26216 | " int64_t offset_in_file,\n" |
| 26217 | " void * vma_in_file,\n" |
| 26218 | " size_t bytes,\n" |
| 26219 | " void * host_addr,\n" |
| 26220 | " void * device_addr,\n" |
| 26221 | " uint64_t module_id\n" |
| 26222 | ");\n" |
| 26223 | "\n" |
| 26224 | "#define ompt_addr_unknown ((void *) ~0)\n" |
| 26225 | "\n" |
| 26226 | "typedef void (*ompt_callback_device_unload_t) (\n" |
| 26227 | " uint64_t device_num,\n" |
| 26228 | " uint64_t module_id\n" |
| 26229 | ");\n" |
| 26230 | "\n" |
| 26231 | "/* control_tool */\n" |
| 26232 | "typedef int (*ompt_callback_control_tool_t) (\n" |
| 26233 | " uint64_t command, /* command of control call */\n" |
| 26234 | " uint64_t modifier, /* modifier of control call */\n" |
| 26235 | " void *arg, /* argument of control call */\n" |
| 26236 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26237 | ");\n" |
| 26238 | "\n" |
| 26239 | "typedef enum ompt_mutex_kind_t {\n" |
| 26240 | " ompt_mutex = 0x10,\n" |
| 26241 | " ompt_mutex_lock = 0x11,\n" |
| 26242 | " ompt_mutex_nest_lock = 0x12,\n" |
| 26243 | " ompt_mutex_critical = 0x13,\n" |
| 26244 | " ompt_mutex_atomic = 0x14,\n" |
| 26245 | " ompt_mutex_ordered = 0x20\n" |
| 26246 | "} ompt_mutex_kind_t;\n" |
| 26247 | "\n" |
| 26248 | "typedef void (*ompt_callback_mutex_acquire_t) (\n" |
| 26249 | " ompt_mutex_kind_t kind, /* mutex kind */\n" |
| 26250 | " unsigned int hint, /* mutex hint */\n" |
| 26251 | " unsigned int impl, /* mutex implementation */\n" |
| 26252 | " omp_wait_id_t wait_id, /* id of object being awaited */\n" |
| 26253 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26254 | ");\n" |
| 26255 | "\n" |
| 26256 | "typedef void (*ompt_callback_mutex_t) (\n" |
| 26257 | " ompt_mutex_kind_t kind, /* mutex kind */\n" |
| 26258 | " omp_wait_id_t wait_id, /* id of object being awaited */\n" |
| 26259 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26260 | ");\n" |
| 26261 | "\n" |
| 26262 | "typedef void (*ompt_callback_nest_lock_t) (\n" |
| 26263 | " ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */\n" |
| 26264 | " omp_wait_id_t wait_id, /* id of object being awaited */\n" |
| 26265 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26266 | ");\n" |
| 26267 | "\n" |
| 26268 | "typedef void (*ompt_callback_master_t) (\n" |
| 26269 | " ompt_scope_endpoint_t endpoint, /* endpoint of master region */\n" |
| 26270 | " ompt_data_t *parallel_data, /* data of parallel region */\n" |
| 26271 | " ompt_data_t *task_data, /* data of task */\n" |
| 26272 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26273 | ");\n" |
| 26274 | "\n" |
| 26275 | "typedef void (*ompt_callback_idle_t) (\n" |
| 26276 | " ompt_scope_endpoint_t endpoint /* endpoint of idle time */\n" |
| 26277 | ");\n" |
| 26278 | "\n" |
| 26279 | "typedef enum ompt_work_type_t {\n" |
| 26280 | " ompt_work_loop = 1,\n" |
| 26281 | " ompt_work_sections = 2,\n" |
| 26282 | " ompt_work_single_executor = 3,\n" |
| 26283 | " ompt_work_single_other = 4,\n" |
| 26284 | " ompt_work_workshare = 5,\n" |
| 26285 | " ompt_work_distribute = 6,\n" |
| 26286 | " ompt_work_taskloop = 7\n" |
| 26287 | "} ompt_work_type_t;\n" |
| 26288 | "\n" |
| 26289 | "typedef void (*ompt_callback_work_t) (\n" |
| 26290 | " ompt_work_type_t wstype, /* type of work region */\n" |
| 26291 | " ompt_scope_endpoint_t endpoint, /* endpoint of work region */\n" |
| 26292 | " ompt_data_t *parallel_data, /* data of parallel region */\n" |
| 26293 | " ompt_data_t *task_data, /* data of task */\n" |
| 26294 | " uint64_t count, /* quantity of work */\n" |
| 26295 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26296 | ");\n" |
| 26297 | "\n" |
| 26298 | "typedef enum ompt_sync_region_kind_t {\n" |
| 26299 | " ompt_sync_region_barrier = 1,\n" |
| 26300 | " ompt_sync_region_taskwait = 2,\n" |
| 26301 | " ompt_sync_region_taskgroup = 3\n" |
| 26302 | "} ompt_sync_region_kind_t;\n" |
| 26303 | "\n" |
| 26304 | "typedef void (*ompt_callback_sync_region_t) (\n" |
| 26305 | " ompt_sync_region_kind_t kind, /* kind of sync region */\n" |
| 26306 | " ompt_scope_endpoint_t endpoint, /* endpoint of sync region */\n" |
| 26307 | " ompt_data_t *parallel_data, /* data of parallel region */\n" |
| 26308 | " ompt_data_t *task_data, /* data of task */\n" |
| 26309 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26310 | ");\n" |
| 26311 | "\n" |
| 26312 | "typedef enum ompt_cancel_flag_t {\n" |
| 26313 | " ompt_cancel_parallel = 0x1,\n" |
| 26314 | " ompt_cancel_sections = 0x2,\n" |
| 26315 | " ompt_cancel_do = 0x4,\n" |
| 26316 | " ompt_cancel_taskgroup = 0x8,\n" |
| 26317 | " ompt_cancel_activated = 0x10,\n" |
| 26318 | " ompt_cancel_detected = 0x20,\n" |
| 26319 | " ompt_cancel_discarded_task = 0x40\n" |
| 26320 | "} ompt_cancel_flag_t;\n" |
| 26321 | "\n" |
| 26322 | "typedef void (*ompt_callback_cancel_t) (\n" |
| 26323 | " ompt_data_t *task_data, /* data of task */\n" |
| 26324 | " int flags, /* cancel flags */\n" |
| 26325 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26326 | ");\n" |
| 26327 | "\n" |
| 26328 | "typedef void (*ompt_callback_flush_t) (\n" |
| 26329 | " ompt_data_t *thread_data, /* data of thread */\n" |
| 26330 | " const void *codeptr_ra /* return address of runtime call */\n" |
| 26331 | ");\n" |
| 26332 | "\n" |
| 26333 | "/****************************************************************************\n" |
| 26334 | " * ompt API\n" |
| 26335 | " ***************************************************************************/\n" |
| 26336 | "\n" |
| 26337 | "#ifdef __cplusplus\n" |
| 26338 | "extern \"C\" {\n" |
| 26339 | "#endif\n" |
| 26340 | "\n" |
| 26341 | "#define OMPT_API_FNTYPE(fn) fn##_t\n" |
| 26342 | "\n" |
| 26343 | "#define OMPT_API_FUNCTION(return_type, fn, args) \\\n" |
| 26344 | " typedef return_type (*OMPT_API_FNTYPE(fn)) args\n" |
| 26345 | "\n" |
| 26346 | "\n" |
| 26347 | "\n" |
| 26348 | "/****************************************************************************\n" |
| 26349 | " * INQUIRY FUNCTIONS\n" |
| 26350 | " ***************************************************************************/\n" |
| 26351 | "\n" |
| 26352 | "/* state */\n" |
| 26353 | "OMPT_API_FUNCTION(omp_state_t, ompt_get_state, (\n" |
| 26354 | " omp_wait_id_t *wait_id\n" |
| 26355 | "));\n" |
| 26356 | "\n" |
| 26357 | "/* thread */\n" |
| 26358 | "OMPT_API_FUNCTION(ompt_data_t*, ompt_get_thread_data, (void));\n" |
| 26359 | "\n" |
| 26360 | "/* parallel region */\n" |
| 26361 | "OMPT_API_FUNCTION(int, ompt_get_parallel_info, (\n" |
| 26362 | " int ancestor_level,\n" |
| 26363 | " ompt_data_t **parallel_data,\n" |
| 26364 | " int *team_size\n" |
| 26365 | "));\n" |
| 26366 | "\n" |
| 26367 | "/* task */\n" |
| 26368 | "OMPT_API_FUNCTION(int, ompt_get_task_info, (\n" |
| 26369 | " int ancestor_level,\n" |
| 26370 | " int *type,\n" |
| 26371 | " ompt_data_t **task_data,\n" |
| 26372 | " omp_frame_t **task_frame,\n" |
| 26373 | " ompt_data_t **parallel_data,\n" |
| 26374 | " int *thread_num\n" |
| 26375 | "));\n" |
| 26376 | "\n" |
| 26377 | "/* procs */\n" |
| 26378 | "OMPT_API_FUNCTION(int, ompt_get_num_procs, (void));\n" |
| 26379 | "\n" |
| 26380 | "/* places */\n" |
| 26381 | "OMPT_API_FUNCTION(int, ompt_get_num_places, (void));\n" |
| 26382 | "\n" |
| 26383 | "OMPT_API_FUNCTION(int, ompt_get_place_proc_ids, (\n" |
| 26384 | " int place_num,\n" |
| 26385 | " int ids_size,\n" |
| 26386 | " int *ids\n" |
| 26387 | "));\n" |
| 26388 | "\n" |
| 26389 | "OMPT_API_FUNCTION(int, ompt_get_place_num, (void));\n" |
| 26390 | "\n" |
| 26391 | "OMPT_API_FUNCTION(int, ompt_get_partition_place_nums, (\n" |
| 26392 | " int place_nums_size,\n" |
| 26393 | " int *place_nums\n" |
| 26394 | "));\n" |
| 26395 | "\n" |
| 26396 | "/* proc_id */\n" |
| 26397 | "OMPT_API_FUNCTION(int, ompt_get_proc_id, (void));\n" |
| 26398 | "\n" |
| 26399 | "\n" |
| 26400 | "/****************************************************************************\n" |
| 26401 | " * INITIALIZATION FUNCTIONS\n" |
| 26402 | " ***************************************************************************/\n" |
| 26403 | "\n" |
| 26404 | "OMPT_API_FUNCTION(int, ompt_initialize, (\n" |
| 26405 | " ompt_function_lookup_t ompt_fn_lookup,\n" |
| 26406 | " ompt_data_t *tool_data\n" |
| 26407 | "));\n" |
| 26408 | "\n" |
| 26409 | "OMPT_API_FUNCTION(void, ompt_finalize, (\n" |
| 26410 | " ompt_data_t *tool_data\n" |
| 26411 | "));\n" |
| 26412 | "\n" |
| 26413 | "typedef struct ompt_start_tool_result_t {\n" |
| 26414 | " ompt_initialize_t initialize;\n" |
| 26415 | " ompt_finalize_t finalize;\n" |
| 26416 | " ompt_data_t tool_data;\n" |
| 26417 | "} ompt_start_tool_result_t;\n" |
| 26418 | "\n" |
| 26419 | "/* initialization interface to be defined by tool */\n" |
| 26420 | "#ifdef _WIN32\n" |
| 26421 | "__declspec(dllexport)\n" |
| 26422 | "#endif\n" |
| 26423 | "ompt_start_tool_result_t * ompt_start_tool(\n" |
| 26424 | " unsigned int omp_version, \n" |
| 26425 | " const char * runtime_version\n" |
| 26426 | ");\n" |
| 26427 | "\n" |
| 26428 | "typedef void (*ompt_callback_t)(void);\n" |
| 26429 | "\n" |
| 26430 | "OMPT_API_FUNCTION(int, ompt_set_callback, (\n" |
| 26431 | " ompt_callbacks_t which,\n" |
| 26432 | " ompt_callback_t callback\n" |
| 26433 | "));\n" |
| 26434 | "\n" |
| 26435 | "OMPT_API_FUNCTION(int, ompt_get_callback, (\n" |
| 26436 | " ompt_callbacks_t which,\n" |
| 26437 | " ompt_callback_t *callback\n" |
| 26438 | "));\n" |
| 26439 | "\n" |
| 26440 | "\n" |
| 26441 | "\n" |
| 26442 | "/****************************************************************************\n" |
| 26443 | " * MISCELLANEOUS FUNCTIONS\n" |
| 26444 | " ***************************************************************************/\n" |
| 26445 | "\n" |
| 26446 | "/* state enumeration */\n" |
| 26447 | "OMPT_API_FUNCTION(int, ompt_enumerate_states, (\n" |
| 26448 | " int current_state,\n" |
| 26449 | " int *next_state,\n" |
| 26450 | " const char **next_state_name\n" |
| 26451 | "));\n" |
| 26452 | "\n" |
| 26453 | "/* mutex implementation enumeration */\n" |
| 26454 | "OMPT_API_FUNCTION(int, ompt_enumerate_mutex_impls, (\n" |
| 26455 | " int current_impl,\n" |
| 26456 | " int *next_impl,\n" |
| 26457 | " const char **next_impl_name\n" |
| 26458 | "));\n" |
| 26459 | "\n" |
| 26460 | "/* get_unique_id */\n" |
| 26461 | "OMPT_API_FUNCTION(uint64_t, ompt_get_unique_id, (void));\n" |
| 26462 | "\n" |
| 26463 | "#ifdef __cplusplus\n" |
| 26464 | "};\n" |
| 26465 | "#endif\n" |
| 26466 | "\n" |
| 26467 | "/****************************************************************************\n" |
| 26468 | " * TARGET\n" |
| 26469 | " ***************************************************************************/\n" |
| 26470 | "\n" |
| 26471 | " OMPT_API_FUNCTION(int, ompt_get_target_info, (\n" |
| 26472 | " uint64_t *device_num,\n" |
| 26473 | " ompt_id_t *target_id,\n" |
| 26474 | " ompt_id_t *host_op_id\n" |
| 26475 | "));\n" |
| 26476 | "\n" |
| 26477 | " OMPT_API_FUNCTION(int, ompt_get_num_devices, (void));\n" |
| 26478 | "\n" |
| 26479 | "#endif /* __OMPT__ */\n" |
| 26480 | "" } , |
| 26481 | { "/builtins/opencl-c.h" , "//===--- opencl-c.h - OpenCL C language builtin function header -----------===//\n" |
| 26482 | "//\n" |
| 26483 | "// The LLVM Compiler Infrastructure\n" |
| 26484 | "//\n" |
| 26485 | "// This file is distributed under the University of Illinois Open Source\n" |
| 26486 | "// License. See LICENSE.TXT for details.\n" |
| 26487 | "//\n" |
| 26488 | "//===----------------------------------------------------------------------===//\n" |
| 26489 | "\n" |
| 26490 | "#ifndef _OPENCL_H_\n" |
| 26491 | "#define _OPENCL_H_\n" |
| 26492 | "\n" |
| 26493 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 26494 | "#ifndef cl_khr_depth_images\n" |
| 26495 | "#define cl_khr_depth_images\n" |
| 26496 | "#endif //cl_khr_depth_images\n" |
| 26497 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 26498 | "\n" |
| 26499 | "#if __OPENCL_C_VERSION__ < CL_VERSION_2_0\n" |
| 26500 | "#ifdef cl_khr_3d_image_writes\n" |
| 26501 | "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n" |
| 26502 | "#endif //cl_khr_3d_image_writes\n" |
| 26503 | "#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0\n" |
| 26504 | "\n" |
| 26505 | "#define __ovld __attribute__((overloadable))\n" |
| 26506 | "#define __conv __attribute__((convergent))\n" |
| 26507 | "\n" |
| 26508 | "// Optimizations\n" |
| 26509 | "#define __purefn __attribute__((pure))\n" |
| 26510 | "#define __cnfn __attribute__((const))\n" |
| 26511 | "\n" |
| 26512 | "// built-in scalar data types:\n" |
| 26513 | "\n" |
| 26514 | "/**\n" |
| 26515 | " * An unsigned 8-bit integer.\n" |
| 26516 | " */\n" |
| 26517 | "typedef unsigned char uchar;\n" |
| 26518 | "\n" |
| 26519 | "/**\n" |
| 26520 | " * An unsigned 16-bit integer.\n" |
| 26521 | " */\n" |
| 26522 | "typedef unsigned short ushort;\n" |
| 26523 | "\n" |
| 26524 | "/**\n" |
| 26525 | " * An unsigned 32-bit integer.\n" |
| 26526 | " */\n" |
| 26527 | "typedef unsigned int uint;\n" |
| 26528 | "\n" |
| 26529 | "/**\n" |
| 26530 | " * An unsigned 64-bit integer.\n" |
| 26531 | " */\n" |
| 26532 | "typedef unsigned long ulong;\n" |
| 26533 | "\n" |
| 26534 | "/**\n" |
| 26535 | " * The unsigned integer type of the result of the sizeof operator. This\n" |
| 26536 | " * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS\n" |
| 26537 | " * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if\n" |
| 26538 | " * CL_DEVICE_ADDRESS_BITS is 64-bits.\n" |
| 26539 | " */\n" |
| 26540 | "typedef __SIZE_TYPE__ size_t;\n" |
| 26541 | "\n" |
| 26542 | "/**\n" |
| 26543 | " * A signed integer type that is the result of subtracting two pointers.\n" |
| 26544 | " * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS\n" |
| 26545 | " * defined in table 4.3 is 32-bits and is a 64-bit signed integer if\n" |
| 26546 | " * CL_DEVICE_ADDRESS_BITS is 64-bits.\n" |
| 26547 | " */\n" |
| 26548 | "typedef __PTRDIFF_TYPE__ ptrdiff_t;\n" |
| 26549 | "\n" |
| 26550 | "/**\n" |
| 26551 | "* A signed integer type with the property that any valid pointer to\n" |
| 26552 | "* void can be converted to this type, then converted back to pointer\n" |
| 26553 | "* to void, and the result will compare equal to the original pointer.\n" |
| 26554 | "*/\n" |
| 26555 | "typedef __INTPTR_TYPE__ intptr_t;\n" |
| 26556 | "\n" |
| 26557 | "/**\n" |
| 26558 | "* An unsigned integer type with the property that any valid pointer to\n" |
| 26559 | "* void can be converted to this type, then converted back to pointer\n" |
| 26560 | "* to void, and the result will compare equal to the original pointer.\n" |
| 26561 | "*/\n" |
| 26562 | "typedef __UINTPTR_TYPE__ uintptr_t;\n" |
| 26563 | "\n" |
| 26564 | "// built-in vector data types:\n" |
| 26565 | "typedef char char2 __attribute__((ext_vector_type(2)));\n" |
| 26566 | "typedef char char3 __attribute__((ext_vector_type(3)));\n" |
| 26567 | "typedef char char4 __attribute__((ext_vector_type(4)));\n" |
| 26568 | "typedef char char8 __attribute__((ext_vector_type(8)));\n" |
| 26569 | "typedef char char16 __attribute__((ext_vector_type(16)));\n" |
| 26570 | "typedef uchar uchar2 __attribute__((ext_vector_type(2)));\n" |
| 26571 | "typedef uchar uchar3 __attribute__((ext_vector_type(3)));\n" |
| 26572 | "typedef uchar uchar4 __attribute__((ext_vector_type(4)));\n" |
| 26573 | "typedef uchar uchar8 __attribute__((ext_vector_type(8)));\n" |
| 26574 | "typedef uchar uchar16 __attribute__((ext_vector_type(16)));\n" |
| 26575 | "typedef short short2 __attribute__((ext_vector_type(2)));\n" |
| 26576 | "typedef short short3 __attribute__((ext_vector_type(3)));\n" |
| 26577 | "typedef short short4 __attribute__((ext_vector_type(4)));\n" |
| 26578 | "typedef short short8 __attribute__((ext_vector_type(8)));\n" |
| 26579 | "typedef short short16 __attribute__((ext_vector_type(16)));\n" |
| 26580 | "typedef ushort ushort2 __attribute__((ext_vector_type(2)));\n" |
| 26581 | "typedef ushort ushort3 __attribute__((ext_vector_type(3)));\n" |
| 26582 | "typedef ushort ushort4 __attribute__((ext_vector_type(4)));\n" |
| 26583 | "typedef ushort ushort8 __attribute__((ext_vector_type(8)));\n" |
| 26584 | "typedef ushort ushort16 __attribute__((ext_vector_type(16)));\n" |
| 26585 | "typedef int int2 __attribute__((ext_vector_type(2)));\n" |
| 26586 | "typedef int int3 __attribute__((ext_vector_type(3)));\n" |
| 26587 | "typedef int int4 __attribute__((ext_vector_type(4)));\n" |
| 26588 | "typedef int int8 __attribute__((ext_vector_type(8)));\n" |
| 26589 | "typedef int int16 __attribute__((ext_vector_type(16)));\n" |
| 26590 | "typedef uint uint2 __attribute__((ext_vector_type(2)));\n" |
| 26591 | "typedef uint uint3 __attribute__((ext_vector_type(3)));\n" |
| 26592 | "typedef uint uint4 __attribute__((ext_vector_type(4)));\n" |
| 26593 | "typedef uint uint8 __attribute__((ext_vector_type(8)));\n" |
| 26594 | "typedef uint uint16 __attribute__((ext_vector_type(16)));\n" |
| 26595 | "typedef long long2 __attribute__((ext_vector_type(2)));\n" |
| 26596 | "typedef long long3 __attribute__((ext_vector_type(3)));\n" |
| 26597 | "typedef long long4 __attribute__((ext_vector_type(4)));\n" |
| 26598 | "typedef long long8 __attribute__((ext_vector_type(8)));\n" |
| 26599 | "typedef long long16 __attribute__((ext_vector_type(16)));\n" |
| 26600 | "typedef ulong ulong2 __attribute__((ext_vector_type(2)));\n" |
| 26601 | "typedef ulong ulong3 __attribute__((ext_vector_type(3)));\n" |
| 26602 | "typedef ulong ulong4 __attribute__((ext_vector_type(4)));\n" |
| 26603 | "typedef ulong ulong8 __attribute__((ext_vector_type(8)));\n" |
| 26604 | "typedef ulong ulong16 __attribute__((ext_vector_type(16)));\n" |
| 26605 | "typedef float float2 __attribute__((ext_vector_type(2)));\n" |
| 26606 | "typedef float float3 __attribute__((ext_vector_type(3)));\n" |
| 26607 | "typedef float float4 __attribute__((ext_vector_type(4)));\n" |
| 26608 | "typedef float float8 __attribute__((ext_vector_type(8)));\n" |
| 26609 | "typedef float float16 __attribute__((ext_vector_type(16)));\n" |
| 26610 | "#ifdef cl_khr_fp16\n" |
| 26611 | "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" |
| 26612 | "typedef half half2 __attribute__((ext_vector_type(2)));\n" |
| 26613 | "typedef half half3 __attribute__((ext_vector_type(3)));\n" |
| 26614 | "typedef half half4 __attribute__((ext_vector_type(4)));\n" |
| 26615 | "typedef half half8 __attribute__((ext_vector_type(8)));\n" |
| 26616 | "typedef half half16 __attribute__((ext_vector_type(16)));\n" |
| 26617 | "#endif\n" |
| 26618 | "#ifdef cl_khr_fp64\n" |
| 26619 | "#if __OPENCL_C_VERSION__ < CL_VERSION_1_2\n" |
| 26620 | "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" |
| 26621 | "#endif\n" |
| 26622 | "typedef double double2 __attribute__((ext_vector_type(2)));\n" |
| 26623 | "typedef double double3 __attribute__((ext_vector_type(3)));\n" |
| 26624 | "typedef double double4 __attribute__((ext_vector_type(4)));\n" |
| 26625 | "typedef double double8 __attribute__((ext_vector_type(8)));\n" |
| 26626 | "typedef double double16 __attribute__((ext_vector_type(16)));\n" |
| 26627 | "#endif\n" |
| 26628 | "\n" |
| 26629 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 26630 | "#define NULL ((void*)0)\n" |
| 26631 | "#endif\n" |
| 26632 | "\n" |
| 26633 | "/**\n" |
| 26634 | " * Value of maximum non-infinite single-precision floating-point\n" |
| 26635 | " * number.\n" |
| 26636 | " */\n" |
| 26637 | "#define MAXFLOAT 0x1.fffffep127f\n" |
| 26638 | "\n" |
| 26639 | "/**\n" |
| 26640 | " * A positive float constant expression. HUGE_VALF evaluates\n" |
| 26641 | " * to +infinity. Used as an error value returned by the built-in\n" |
| 26642 | " * math functions.\n" |
| 26643 | " */\n" |
| 26644 | "#define HUGE_VALF (__builtin_huge_valf())\n" |
| 26645 | "\n" |
| 26646 | "/**\n" |
| 26647 | " * A positive double constant expression. HUGE_VAL evaluates\n" |
| 26648 | " * to +infinity. Used as an error value returned by the built-in\n" |
| 26649 | " * math functions.\n" |
| 26650 | " */\n" |
| 26651 | "#define HUGE_VAL (__builtin_huge_val())\n" |
| 26652 | "\n" |
| 26653 | "/**\n" |
| 26654 | " * A constant expression of type float representing positive or\n" |
| 26655 | " * unsigned infinity.\n" |
| 26656 | " */\n" |
| 26657 | "#define INFINITY (__builtin_inff())\n" |
| 26658 | "\n" |
| 26659 | "/**\n" |
| 26660 | " * A constant expression of type float representing a quiet NaN.\n" |
| 26661 | " */\n" |
| 26662 | "#define NAN as_float(INT_MAX)\n" |
| 26663 | "\n" |
| 26664 | "#define FP_ILOGB0 INT_MIN\n" |
| 26665 | "#define FP_ILOGBNAN INT_MAX\n" |
| 26666 | "\n" |
| 26667 | "#define FLT_DIG 6\n" |
| 26668 | "#define FLT_MANT_DIG 24\n" |
| 26669 | "#define FLT_MAX_10_EXP +38\n" |
| 26670 | "#define FLT_MAX_EXP +128\n" |
| 26671 | "#define FLT_MIN_10_EXP -37\n" |
| 26672 | "#define FLT_MIN_EXP -125\n" |
| 26673 | "#define FLT_RADIX 2\n" |
| 26674 | "#define FLT_MAX 0x1.fffffep127f\n" |
| 26675 | "#define FLT_MIN 0x1.0p-126f\n" |
| 26676 | "#define FLT_EPSILON 0x1.0p-23f\n" |
| 26677 | "\n" |
| 26678 | "#define M_E_F 2.71828182845904523536028747135266250f\n" |
| 26679 | "#define M_LOG2E_F 1.44269504088896340735992468100189214f\n" |
| 26680 | "#define M_LOG10E_F 0.434294481903251827651128918916605082f\n" |
| 26681 | "#define M_LN2_F 0.693147180559945309417232121458176568f\n" |
| 26682 | "#define M_LN10_F 2.30258509299404568401799145468436421f\n" |
| 26683 | "#define M_PI_F 3.14159265358979323846264338327950288f\n" |
| 26684 | "#define M_PI_2_F 1.57079632679489661923132169163975144f\n" |
| 26685 | "#define M_PI_4_F 0.785398163397448309615660845819875721f\n" |
| 26686 | "#define M_1_PI_F 0.318309886183790671537767526745028724f\n" |
| 26687 | "#define M_2_PI_F 0.636619772367581343075535053490057448f\n" |
| 26688 | "#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f\n" |
| 26689 | "#define M_SQRT2_F 1.41421356237309504880168872420969808f\n" |
| 26690 | "#define M_SQRT1_2_F 0.707106781186547524400844362104849039f\n" |
| 26691 | "\n" |
| 26692 | "#define DBL_DIG 15\n" |
| 26693 | "#define DBL_MANT_DIG 53\n" |
| 26694 | "#define DBL_MAX_10_EXP +308\n" |
| 26695 | "#define DBL_MAX_EXP +1024\n" |
| 26696 | "#define DBL_MIN_10_EXP -307\n" |
| 26697 | "#define DBL_MIN_EXP -1021\n" |
| 26698 | "#define DBL_RADIX 2\n" |
| 26699 | "#define DBL_MAX 0x1.fffffffffffffp1023\n" |
| 26700 | "#define DBL_MIN 0x1.0p-1022\n" |
| 26701 | "#define DBL_EPSILON 0x1.0p-52\n" |
| 26702 | "\n" |
| 26703 | "#define M_E 0x1.5bf0a8b145769p+1\n" |
| 26704 | "#define M_LOG2E 0x1.71547652b82fep+0\n" |
| 26705 | "#define M_LOG10E 0x1.bcb7b1526e50ep-2\n" |
| 26706 | "#define M_LN2 0x1.62e42fefa39efp-1\n" |
| 26707 | "#define M_LN10 0x1.26bb1bbb55516p+1\n" |
| 26708 | "#define M_PI 0x1.921fb54442d18p+1\n" |
| 26709 | "#define M_PI_2 0x1.921fb54442d18p+0\n" |
| 26710 | "#define M_PI_4 0x1.921fb54442d18p-1\n" |
| 26711 | "#define M_1_PI 0x1.45f306dc9c883p-2\n" |
| 26712 | "#define M_2_PI 0x1.45f306dc9c883p-1\n" |
| 26713 | "#define M_2_SQRTPI 0x1.20dd750429b6dp+0\n" |
| 26714 | "#define M_SQRT2 0x1.6a09e667f3bcdp+0\n" |
| 26715 | "#define M_SQRT1_2 0x1.6a09e667f3bcdp-1\n" |
| 26716 | "\n" |
| 26717 | "#ifdef cl_khr_fp16\n" |
| 26718 | "\n" |
| 26719 | "#define HALF_DIG 3\n" |
| 26720 | "#define HALF_MANT_DIG 11\n" |
| 26721 | "#define HALF_MAX_10_EXP +4\n" |
| 26722 | "#define HALF_MAX_EXP +16\n" |
| 26723 | "#define HALF_MIN_10_EXP -4\n" |
| 26724 | "#define HALF_MIN_EXP -13\n" |
| 26725 | "#define HALF_RADIX 2\n" |
| 26726 | "#define HALF_MAX ((0x1.ffcp15h))\n" |
| 26727 | "#define HALF_MIN ((0x1.0p-14h))\n" |
| 26728 | "#define HALF_EPSILON ((0x1.0p-10h))\n" |
| 26729 | "\n" |
| 26730 | "#define M_E_H 2.71828182845904523536028747135266250h\n" |
| 26731 | "#define M_LOG2E_H 1.44269504088896340735992468100189214h\n" |
| 26732 | "#define M_LOG10E_H 0.434294481903251827651128918916605082h\n" |
| 26733 | "#define M_LN2_H 0.693147180559945309417232121458176568h\n" |
| 26734 | "#define M_LN10_H 2.30258509299404568401799145468436421h\n" |
| 26735 | "#define M_PI_H 3.14159265358979323846264338327950288h\n" |
| 26736 | "#define M_PI_2_H 1.57079632679489661923132169163975144h\n" |
| 26737 | "#define M_PI_4_H 0.785398163397448309615660845819875721h\n" |
| 26738 | "#define M_1_PI_H 0.318309886183790671537767526745028724h\n" |
| 26739 | "#define M_2_PI_H 0.636619772367581343075535053490057448h\n" |
| 26740 | "#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h\n" |
| 26741 | "#define M_SQRT2_H 1.41421356237309504880168872420969808h\n" |
| 26742 | "#define M_SQRT1_2_H 0.707106781186547524400844362104849039h\n" |
| 26743 | "\n" |
| 26744 | "#endif //cl_khr_fp16\n" |
| 26745 | "\n" |
| 26746 | "#define CHAR_BIT 8\n" |
| 26747 | "#define SCHAR_MAX 127\n" |
| 26748 | "#define SCHAR_MIN (-128)\n" |
| 26749 | "#define UCHAR_MAX 255\n" |
| 26750 | "#define CHAR_MAX SCHAR_MAX\n" |
| 26751 | "#define CHAR_MIN SCHAR_MIN\n" |
| 26752 | "#define USHRT_MAX 65535\n" |
| 26753 | "#define SHRT_MAX 32767\n" |
| 26754 | "#define SHRT_MIN (-32768)\n" |
| 26755 | "#define UINT_MAX 0xffffffff\n" |
| 26756 | "#define INT_MAX 2147483647\n" |
| 26757 | "#define INT_MIN (-2147483647-1)\n" |
| 26758 | "#define ULONG_MAX 0xffffffffffffffffUL\n" |
| 26759 | "#define LONG_MAX 0x7fffffffffffffffL\n" |
| 26760 | "#define LONG_MIN (-0x7fffffffffffffffL-1)\n" |
| 26761 | "\n" |
| 26762 | "// OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions\n" |
| 26763 | "\n" |
| 26764 | "char __ovld __cnfn convert_char_rte(char);\n" |
| 26765 | "char __ovld __cnfn convert_char_sat_rte(char);\n" |
| 26766 | "char __ovld __cnfn convert_char_rtz(char);\n" |
| 26767 | "char __ovld __cnfn convert_char_sat_rtz(char);\n" |
| 26768 | "char __ovld __cnfn convert_char_rtp(char);\n" |
| 26769 | "char __ovld __cnfn convert_char_sat_rtp(char);\n" |
| 26770 | "char __ovld __cnfn convert_char_rtn(char);\n" |
| 26771 | "char __ovld __cnfn convert_char_sat_rtn(char);\n" |
| 26772 | "char __ovld __cnfn convert_char(char);\n" |
| 26773 | "char __ovld __cnfn convert_char_sat(char);\n" |
| 26774 | "char __ovld __cnfn convert_char_rte(uchar);\n" |
| 26775 | "char __ovld __cnfn convert_char_sat_rte(uchar);\n" |
| 26776 | "char __ovld __cnfn convert_char_rtz(uchar);\n" |
| 26777 | "char __ovld __cnfn convert_char_sat_rtz(uchar);\n" |
| 26778 | "char __ovld __cnfn convert_char_rtp(uchar);\n" |
| 26779 | "char __ovld __cnfn convert_char_sat_rtp(uchar);\n" |
| 26780 | "char __ovld __cnfn convert_char_rtn(uchar);\n" |
| 26781 | "char __ovld __cnfn convert_char_sat_rtn(uchar);\n" |
| 26782 | "char __ovld __cnfn convert_char(uchar);\n" |
| 26783 | "char __ovld __cnfn convert_char_sat(uchar);\n" |
| 26784 | "char __ovld __cnfn convert_char_rte(short);\n" |
| 26785 | "char __ovld __cnfn convert_char_sat_rte(short);\n" |
| 26786 | "char __ovld __cnfn convert_char_rtz(short);\n" |
| 26787 | "char __ovld __cnfn convert_char_sat_rtz(short);\n" |
| 26788 | "char __ovld __cnfn convert_char_rtp(short);\n" |
| 26789 | "char __ovld __cnfn convert_char_sat_rtp(short);\n" |
| 26790 | "char __ovld __cnfn convert_char_rtn(short);\n" |
| 26791 | "char __ovld __cnfn convert_char_sat_rtn(short);\n" |
| 26792 | "char __ovld __cnfn convert_char(short);\n" |
| 26793 | "char __ovld __cnfn convert_char_sat(short);\n" |
| 26794 | "char __ovld __cnfn convert_char_rte(ushort);\n" |
| 26795 | "char __ovld __cnfn convert_char_sat_rte(ushort);\n" |
| 26796 | "char __ovld __cnfn convert_char_rtz(ushort);\n" |
| 26797 | "char __ovld __cnfn convert_char_sat_rtz(ushort);\n" |
| 26798 | "char __ovld __cnfn convert_char_rtp(ushort);\n" |
| 26799 | "char __ovld __cnfn convert_char_sat_rtp(ushort);\n" |
| 26800 | "char __ovld __cnfn convert_char_rtn(ushort);\n" |
| 26801 | "char __ovld __cnfn convert_char_sat_rtn(ushort);\n" |
| 26802 | "char __ovld __cnfn convert_char(ushort);\n" |
| 26803 | "char __ovld __cnfn convert_char_sat(ushort);\n" |
| 26804 | "char __ovld __cnfn convert_char_rte(int);\n" |
| 26805 | "char __ovld __cnfn convert_char_sat_rte(int);\n" |
| 26806 | "char __ovld __cnfn convert_char_rtz(int);\n" |
| 26807 | "char __ovld __cnfn convert_char_sat_rtz(int);\n" |
| 26808 | "char __ovld __cnfn convert_char_rtp(int);\n" |
| 26809 | "char __ovld __cnfn convert_char_sat_rtp(int);\n" |
| 26810 | "char __ovld __cnfn convert_char_rtn(int);\n" |
| 26811 | "char __ovld __cnfn convert_char_sat_rtn(int);\n" |
| 26812 | "char __ovld __cnfn convert_char(int);\n" |
| 26813 | "char __ovld __cnfn convert_char_sat(int);\n" |
| 26814 | "char __ovld __cnfn convert_char_rte(uint);\n" |
| 26815 | "char __ovld __cnfn convert_char_sat_rte(uint);\n" |
| 26816 | "char __ovld __cnfn convert_char_rtz(uint);\n" |
| 26817 | "char __ovld __cnfn convert_char_sat_rtz(uint);\n" |
| 26818 | "char __ovld __cnfn convert_char_rtp(uint);\n" |
| 26819 | "char __ovld __cnfn convert_char_sat_rtp(uint);\n" |
| 26820 | "char __ovld __cnfn convert_char_rtn(uint);\n" |
| 26821 | "char __ovld __cnfn convert_char_sat_rtn(uint);\n" |
| 26822 | "char __ovld __cnfn convert_char(uint);\n" |
| 26823 | "char __ovld __cnfn convert_char_sat(uint);\n" |
| 26824 | "char __ovld __cnfn convert_char_rte(long);\n" |
| 26825 | "char __ovld __cnfn convert_char_sat_rte(long);\n" |
| 26826 | "char __ovld __cnfn convert_char_rtz(long);\n" |
| 26827 | "char __ovld __cnfn convert_char_sat_rtz(long);\n" |
| 26828 | "char __ovld __cnfn convert_char_rtp(long);\n" |
| 26829 | "char __ovld __cnfn convert_char_sat_rtp(long);\n" |
| 26830 | "char __ovld __cnfn convert_char_rtn(long);\n" |
| 26831 | "char __ovld __cnfn convert_char_sat_rtn(long);\n" |
| 26832 | "char __ovld __cnfn convert_char(long);\n" |
| 26833 | "char __ovld __cnfn convert_char_sat(long);\n" |
| 26834 | "char __ovld __cnfn convert_char_rte(ulong);\n" |
| 26835 | "char __ovld __cnfn convert_char_sat_rte(ulong);\n" |
| 26836 | "char __ovld __cnfn convert_char_rtz(ulong);\n" |
| 26837 | "char __ovld __cnfn convert_char_sat_rtz(ulong);\n" |
| 26838 | "char __ovld __cnfn convert_char_rtp(ulong);\n" |
| 26839 | "char __ovld __cnfn convert_char_sat_rtp(ulong);\n" |
| 26840 | "char __ovld __cnfn convert_char_rtn(ulong);\n" |
| 26841 | "char __ovld __cnfn convert_char_sat_rtn(ulong);\n" |
| 26842 | "char __ovld __cnfn convert_char(ulong);\n" |
| 26843 | "char __ovld __cnfn convert_char_sat(ulong);\n" |
| 26844 | "char __ovld __cnfn convert_char_rte(float);\n" |
| 26845 | "char __ovld __cnfn convert_char_sat_rte(float);\n" |
| 26846 | "char __ovld __cnfn convert_char_rtz(float);\n" |
| 26847 | "char __ovld __cnfn convert_char_sat_rtz(float);\n" |
| 26848 | "char __ovld __cnfn convert_char_rtp(float);\n" |
| 26849 | "char __ovld __cnfn convert_char_sat_rtp(float);\n" |
| 26850 | "char __ovld __cnfn convert_char_rtn(float);\n" |
| 26851 | "char __ovld __cnfn convert_char_sat_rtn(float);\n" |
| 26852 | "char __ovld __cnfn convert_char(float);\n" |
| 26853 | "char __ovld __cnfn convert_char_sat(float);\n" |
| 26854 | "uchar __ovld __cnfn convert_uchar_rte(char);\n" |
| 26855 | "uchar __ovld __cnfn convert_uchar_sat_rte(char);\n" |
| 26856 | "uchar __ovld __cnfn convert_uchar_rtz(char);\n" |
| 26857 | "uchar __ovld __cnfn convert_uchar_sat_rtz(char);\n" |
| 26858 | "uchar __ovld __cnfn convert_uchar_rtp(char);\n" |
| 26859 | "uchar __ovld __cnfn convert_uchar_sat_rtp(char);\n" |
| 26860 | "uchar __ovld __cnfn convert_uchar_rtn(char);\n" |
| 26861 | "uchar __ovld __cnfn convert_uchar_sat_rtn(char);\n" |
| 26862 | "uchar __ovld __cnfn convert_uchar(char);\n" |
| 26863 | "uchar __ovld __cnfn convert_uchar_sat(char);\n" |
| 26864 | "uchar __ovld __cnfn convert_uchar_rte(uchar);\n" |
| 26865 | "uchar __ovld __cnfn convert_uchar_sat_rte(uchar);\n" |
| 26866 | "uchar __ovld __cnfn convert_uchar_rtz(uchar);\n" |
| 26867 | "uchar __ovld __cnfn convert_uchar_sat_rtz(uchar);\n" |
| 26868 | "uchar __ovld __cnfn convert_uchar_rtp(uchar);\n" |
| 26869 | "uchar __ovld __cnfn convert_uchar_sat_rtp(uchar);\n" |
| 26870 | "uchar __ovld __cnfn convert_uchar_rtn(uchar);\n" |
| 26871 | "uchar __ovld __cnfn convert_uchar_sat_rtn(uchar);\n" |
| 26872 | "uchar __ovld __cnfn convert_uchar(uchar);\n" |
| 26873 | "uchar __ovld __cnfn convert_uchar_sat(uchar);\n" |
| 26874 | "uchar __ovld __cnfn convert_uchar_rte(short);\n" |
| 26875 | "uchar __ovld __cnfn convert_uchar_sat_rte(short);\n" |
| 26876 | "uchar __ovld __cnfn convert_uchar_rtz(short);\n" |
| 26877 | "uchar __ovld __cnfn convert_uchar_sat_rtz(short);\n" |
| 26878 | "uchar __ovld __cnfn convert_uchar_rtp(short);\n" |
| 26879 | "uchar __ovld __cnfn convert_uchar_sat_rtp(short);\n" |
| 26880 | "uchar __ovld __cnfn convert_uchar_rtn(short);\n" |
| 26881 | "uchar __ovld __cnfn convert_uchar_sat_rtn(short);\n" |
| 26882 | "uchar __ovld __cnfn convert_uchar(short);\n" |
| 26883 | "uchar __ovld __cnfn convert_uchar_sat(short);\n" |
| 26884 | "uchar __ovld __cnfn convert_uchar_rte(ushort);\n" |
| 26885 | "uchar __ovld __cnfn convert_uchar_sat_rte(ushort);\n" |
| 26886 | "uchar __ovld __cnfn convert_uchar_rtz(ushort);\n" |
| 26887 | "uchar __ovld __cnfn convert_uchar_sat_rtz(ushort);\n" |
| 26888 | "uchar __ovld __cnfn convert_uchar_rtp(ushort);\n" |
| 26889 | "uchar __ovld __cnfn convert_uchar_sat_rtp(ushort);\n" |
| 26890 | "uchar __ovld __cnfn convert_uchar_rtn(ushort);\n" |
| 26891 | "uchar __ovld __cnfn convert_uchar_sat_rtn(ushort);\n" |
| 26892 | "uchar __ovld __cnfn convert_uchar(ushort);\n" |
| 26893 | "uchar __ovld __cnfn convert_uchar_sat(ushort);\n" |
| 26894 | "uchar __ovld __cnfn convert_uchar_rte(int);\n" |
| 26895 | "uchar __ovld __cnfn convert_uchar_sat_rte(int);\n" |
| 26896 | "uchar __ovld __cnfn convert_uchar_rtz(int);\n" |
| 26897 | "uchar __ovld __cnfn convert_uchar_sat_rtz(int);\n" |
| 26898 | "uchar __ovld __cnfn convert_uchar_rtp(int);\n" |
| 26899 | "uchar __ovld __cnfn convert_uchar_sat_rtp(int);\n" |
| 26900 | "uchar __ovld __cnfn convert_uchar_rtn(int);\n" |
| 26901 | "uchar __ovld __cnfn convert_uchar_sat_rtn(int);\n" |
| 26902 | "uchar __ovld __cnfn convert_uchar(int);\n" |
| 26903 | "uchar __ovld __cnfn convert_uchar_sat(int);\n" |
| 26904 | "uchar __ovld __cnfn convert_uchar_rte(uint);\n" |
| 26905 | "uchar __ovld __cnfn convert_uchar_sat_rte(uint);\n" |
| 26906 | "uchar __ovld __cnfn convert_uchar_rtz(uint);\n" |
| 26907 | "uchar __ovld __cnfn convert_uchar_sat_rtz(uint);\n" |
| 26908 | "uchar __ovld __cnfn convert_uchar_rtp(uint);\n" |
| 26909 | "uchar __ovld __cnfn convert_uchar_sat_rtp(uint);\n" |
| 26910 | "uchar __ovld __cnfn convert_uchar_rtn(uint);\n" |
| 26911 | "uchar __ovld __cnfn convert_uchar_sat_rtn(uint);\n" |
| 26912 | "uchar __ovld __cnfn convert_uchar(uint);\n" |
| 26913 | "uchar __ovld __cnfn convert_uchar_sat(uint);\n" |
| 26914 | "uchar __ovld __cnfn convert_uchar_rte(long);\n" |
| 26915 | "uchar __ovld __cnfn convert_uchar_sat_rte(long);\n" |
| 26916 | "uchar __ovld __cnfn convert_uchar_rtz(long);\n" |
| 26917 | "uchar __ovld __cnfn convert_uchar_sat_rtz(long);\n" |
| 26918 | "uchar __ovld __cnfn convert_uchar_rtp(long);\n" |
| 26919 | "uchar __ovld __cnfn convert_uchar_sat_rtp(long);\n" |
| 26920 | "uchar __ovld __cnfn convert_uchar_rtn(long);\n" |
| 26921 | "uchar __ovld __cnfn convert_uchar_sat_rtn(long);\n" |
| 26922 | "uchar __ovld __cnfn convert_uchar(long);\n" |
| 26923 | "uchar __ovld __cnfn convert_uchar_sat(long);\n" |
| 26924 | "uchar __ovld __cnfn convert_uchar_rte(ulong);\n" |
| 26925 | "uchar __ovld __cnfn convert_uchar_sat_rte(ulong);\n" |
| 26926 | "uchar __ovld __cnfn convert_uchar_rtz(ulong);\n" |
| 26927 | "uchar __ovld __cnfn convert_uchar_sat_rtz(ulong);\n" |
| 26928 | "uchar __ovld __cnfn convert_uchar_rtp(ulong);\n" |
| 26929 | "uchar __ovld __cnfn convert_uchar_sat_rtp(ulong);\n" |
| 26930 | "uchar __ovld __cnfn convert_uchar_rtn(ulong);\n" |
| 26931 | "uchar __ovld __cnfn convert_uchar_sat_rtn(ulong);\n" |
| 26932 | "uchar __ovld __cnfn convert_uchar(ulong);\n" |
| 26933 | "uchar __ovld __cnfn convert_uchar_sat(ulong);\n" |
| 26934 | "uchar __ovld __cnfn convert_uchar_rte(float);\n" |
| 26935 | "uchar __ovld __cnfn convert_uchar_sat_rte(float);\n" |
| 26936 | "uchar __ovld __cnfn convert_uchar_rtz(float);\n" |
| 26937 | "uchar __ovld __cnfn convert_uchar_sat_rtz(float);\n" |
| 26938 | "uchar __ovld __cnfn convert_uchar_rtp(float);\n" |
| 26939 | "uchar __ovld __cnfn convert_uchar_sat_rtp(float);\n" |
| 26940 | "uchar __ovld __cnfn convert_uchar_rtn(float);\n" |
| 26941 | "uchar __ovld __cnfn convert_uchar_sat_rtn(float);\n" |
| 26942 | "uchar __ovld __cnfn convert_uchar(float);\n" |
| 26943 | "uchar __ovld __cnfn convert_uchar_sat(float);\n" |
| 26944 | "\n" |
| 26945 | "short __ovld __cnfn convert_short_rte(char);\n" |
| 26946 | "short __ovld __cnfn convert_short_sat_rte(char);\n" |
| 26947 | "short __ovld __cnfn convert_short_rtz(char);\n" |
| 26948 | "short __ovld __cnfn convert_short_sat_rtz(char);\n" |
| 26949 | "short __ovld __cnfn convert_short_rtp(char);\n" |
| 26950 | "short __ovld __cnfn convert_short_sat_rtp(char);\n" |
| 26951 | "short __ovld __cnfn convert_short_rtn(char);\n" |
| 26952 | "short __ovld __cnfn convert_short_sat_rtn(char);\n" |
| 26953 | "short __ovld __cnfn convert_short(char);\n" |
| 26954 | "short __ovld __cnfn convert_short_sat(char);\n" |
| 26955 | "short __ovld __cnfn convert_short_rte(uchar);\n" |
| 26956 | "short __ovld __cnfn convert_short_sat_rte(uchar);\n" |
| 26957 | "short __ovld __cnfn convert_short_rtz(uchar);\n" |
| 26958 | "short __ovld __cnfn convert_short_sat_rtz(uchar);\n" |
| 26959 | "short __ovld __cnfn convert_short_rtp(uchar);\n" |
| 26960 | "short __ovld __cnfn convert_short_sat_rtp(uchar);\n" |
| 26961 | "short __ovld __cnfn convert_short_rtn(uchar);\n" |
| 26962 | "short __ovld __cnfn convert_short_sat_rtn(uchar);\n" |
| 26963 | "short __ovld __cnfn convert_short(uchar);\n" |
| 26964 | "short __ovld __cnfn convert_short_sat(uchar);\n" |
| 26965 | "short __ovld __cnfn convert_short_rte(short);\n" |
| 26966 | "short __ovld __cnfn convert_short_sat_rte(short);\n" |
| 26967 | "short __ovld __cnfn convert_short_rtz(short);\n" |
| 26968 | "short __ovld __cnfn convert_short_sat_rtz(short);\n" |
| 26969 | "short __ovld __cnfn convert_short_rtp(short);\n" |
| 26970 | "short __ovld __cnfn convert_short_sat_rtp(short);\n" |
| 26971 | "short __ovld __cnfn convert_short_rtn(short);\n" |
| 26972 | "short __ovld __cnfn convert_short_sat_rtn(short);\n" |
| 26973 | "short __ovld __cnfn convert_short(short);\n" |
| 26974 | "short __ovld __cnfn convert_short_sat(short);\n" |
| 26975 | "short __ovld __cnfn convert_short_rte(ushort);\n" |
| 26976 | "short __ovld __cnfn convert_short_sat_rte(ushort);\n" |
| 26977 | "short __ovld __cnfn convert_short_rtz(ushort);\n" |
| 26978 | "short __ovld __cnfn convert_short_sat_rtz(ushort);\n" |
| 26979 | "short __ovld __cnfn convert_short_rtp(ushort);\n" |
| 26980 | "short __ovld __cnfn convert_short_sat_rtp(ushort);\n" |
| 26981 | "short __ovld __cnfn convert_short_rtn(ushort);\n" |
| 26982 | "short __ovld __cnfn convert_short_sat_rtn(ushort);\n" |
| 26983 | "short __ovld __cnfn convert_short(ushort);\n" |
| 26984 | "short __ovld __cnfn convert_short_sat(ushort);\n" |
| 26985 | "short __ovld __cnfn convert_short_rte(int);\n" |
| 26986 | "short __ovld __cnfn convert_short_sat_rte(int);\n" |
| 26987 | "short __ovld __cnfn convert_short_rtz(int);\n" |
| 26988 | "short __ovld __cnfn convert_short_sat_rtz(int);\n" |
| 26989 | "short __ovld __cnfn convert_short_rtp(int);\n" |
| 26990 | "short __ovld __cnfn convert_short_sat_rtp(int);\n" |
| 26991 | "short __ovld __cnfn convert_short_rtn(int);\n" |
| 26992 | "short __ovld __cnfn convert_short_sat_rtn(int);\n" |
| 26993 | "short __ovld __cnfn convert_short(int);\n" |
| 26994 | "short __ovld __cnfn convert_short_sat(int);\n" |
| 26995 | "short __ovld __cnfn convert_short_rte(uint);\n" |
| 26996 | "short __ovld __cnfn convert_short_sat_rte(uint);\n" |
| 26997 | "short __ovld __cnfn convert_short_rtz(uint);\n" |
| 26998 | "short __ovld __cnfn convert_short_sat_rtz(uint);\n" |
| 26999 | "short __ovld __cnfn convert_short_rtp(uint);\n" |
| 27000 | "short __ovld __cnfn convert_short_sat_rtp(uint);\n" |
| 27001 | "short __ovld __cnfn convert_short_rtn(uint);\n" |
| 27002 | "short __ovld __cnfn convert_short_sat_rtn(uint);\n" |
| 27003 | "short __ovld __cnfn convert_short(uint);\n" |
| 27004 | "short __ovld __cnfn convert_short_sat(uint);\n" |
| 27005 | "short __ovld __cnfn convert_short_rte(long);\n" |
| 27006 | "short __ovld __cnfn convert_short_sat_rte(long);\n" |
| 27007 | "short __ovld __cnfn convert_short_rtz(long);\n" |
| 27008 | "short __ovld __cnfn convert_short_sat_rtz(long);\n" |
| 27009 | "short __ovld __cnfn convert_short_rtp(long);\n" |
| 27010 | "short __ovld __cnfn convert_short_sat_rtp(long);\n" |
| 27011 | "short __ovld __cnfn convert_short_rtn(long);\n" |
| 27012 | "short __ovld __cnfn convert_short_sat_rtn(long);\n" |
| 27013 | "short __ovld __cnfn convert_short(long);\n" |
| 27014 | "short __ovld __cnfn convert_short_sat(long);\n" |
| 27015 | "short __ovld __cnfn convert_short_rte(ulong);\n" |
| 27016 | "short __ovld __cnfn convert_short_sat_rte(ulong);\n" |
| 27017 | "short __ovld __cnfn convert_short_rtz(ulong);\n" |
| 27018 | "short __ovld __cnfn convert_short_sat_rtz(ulong);\n" |
| 27019 | "short __ovld __cnfn convert_short_rtp(ulong);\n" |
| 27020 | "short __ovld __cnfn convert_short_sat_rtp(ulong);\n" |
| 27021 | "short __ovld __cnfn convert_short_rtn(ulong);\n" |
| 27022 | "short __ovld __cnfn convert_short_sat_rtn(ulong);\n" |
| 27023 | "short __ovld __cnfn convert_short(ulong);\n" |
| 27024 | "short __ovld __cnfn convert_short_sat(ulong);\n" |
| 27025 | "short __ovld __cnfn convert_short_rte(float);\n" |
| 27026 | "short __ovld __cnfn convert_short_sat_rte(float);\n" |
| 27027 | "short __ovld __cnfn convert_short_rtz(float);\n" |
| 27028 | "short __ovld __cnfn convert_short_sat_rtz(float);\n" |
| 27029 | "short __ovld __cnfn convert_short_rtp(float);\n" |
| 27030 | "short __ovld __cnfn convert_short_sat_rtp(float);\n" |
| 27031 | "short __ovld __cnfn convert_short_rtn(float);\n" |
| 27032 | "short __ovld __cnfn convert_short_sat_rtn(float);\n" |
| 27033 | "short __ovld __cnfn convert_short(float);\n" |
| 27034 | "short __ovld __cnfn convert_short_sat(float);\n" |
| 27035 | "ushort __ovld __cnfn convert_ushort_rte(char);\n" |
| 27036 | "ushort __ovld __cnfn convert_ushort_sat_rte(char);\n" |
| 27037 | "ushort __ovld __cnfn convert_ushort_rtz(char);\n" |
| 27038 | "ushort __ovld __cnfn convert_ushort_sat_rtz(char);\n" |
| 27039 | "ushort __ovld __cnfn convert_ushort_rtp(char);\n" |
| 27040 | "ushort __ovld __cnfn convert_ushort_sat_rtp(char);\n" |
| 27041 | "ushort __ovld __cnfn convert_ushort_rtn(char);\n" |
| 27042 | "ushort __ovld __cnfn convert_ushort_sat_rtn(char);\n" |
| 27043 | "ushort __ovld __cnfn convert_ushort(char);\n" |
| 27044 | "ushort __ovld __cnfn convert_ushort_sat(char);\n" |
| 27045 | "ushort __ovld __cnfn convert_ushort_rte(uchar);\n" |
| 27046 | "ushort __ovld __cnfn convert_ushort_sat_rte(uchar);\n" |
| 27047 | "ushort __ovld __cnfn convert_ushort_rtz(uchar);\n" |
| 27048 | "ushort __ovld __cnfn convert_ushort_sat_rtz(uchar);\n" |
| 27049 | "ushort __ovld __cnfn convert_ushort_rtp(uchar);\n" |
| 27050 | "ushort __ovld __cnfn convert_ushort_sat_rtp(uchar);\n" |
| 27051 | "ushort __ovld __cnfn convert_ushort_rtn(uchar);\n" |
| 27052 | "ushort __ovld __cnfn convert_ushort_sat_rtn(uchar);\n" |
| 27053 | "ushort __ovld __cnfn convert_ushort(uchar);\n" |
| 27054 | "ushort __ovld __cnfn convert_ushort_sat(uchar);\n" |
| 27055 | "ushort __ovld __cnfn convert_ushort_rte(short);\n" |
| 27056 | "ushort __ovld __cnfn convert_ushort_sat_rte(short);\n" |
| 27057 | "ushort __ovld __cnfn convert_ushort_rtz(short);\n" |
| 27058 | "ushort __ovld __cnfn convert_ushort_sat_rtz(short);\n" |
| 27059 | "ushort __ovld __cnfn convert_ushort_rtp(short);\n" |
| 27060 | "ushort __ovld __cnfn convert_ushort_sat_rtp(short);\n" |
| 27061 | "ushort __ovld __cnfn convert_ushort_rtn(short);\n" |
| 27062 | "ushort __ovld __cnfn convert_ushort_sat_rtn(short);\n" |
| 27063 | "ushort __ovld __cnfn convert_ushort(short);\n" |
| 27064 | "ushort __ovld __cnfn convert_ushort_sat(short);\n" |
| 27065 | "ushort __ovld __cnfn convert_ushort_rte(ushort);\n" |
| 27066 | "ushort __ovld __cnfn convert_ushort_sat_rte(ushort);\n" |
| 27067 | "ushort __ovld __cnfn convert_ushort_rtz(ushort);\n" |
| 27068 | "ushort __ovld __cnfn convert_ushort_sat_rtz(ushort);\n" |
| 27069 | "ushort __ovld __cnfn convert_ushort_rtp(ushort);\n" |
| 27070 | "ushort __ovld __cnfn convert_ushort_sat_rtp(ushort);\n" |
| 27071 | "ushort __ovld __cnfn convert_ushort_rtn(ushort);\n" |
| 27072 | "ushort __ovld __cnfn convert_ushort_sat_rtn(ushort);\n" |
| 27073 | "ushort __ovld __cnfn convert_ushort(ushort);\n" |
| 27074 | "ushort __ovld __cnfn convert_ushort_sat(ushort);\n" |
| 27075 | "ushort __ovld __cnfn convert_ushort_rte(int);\n" |
| 27076 | "ushort __ovld __cnfn convert_ushort_sat_rte(int);\n" |
| 27077 | "ushort __ovld __cnfn convert_ushort_rtz(int);\n" |
| 27078 | "ushort __ovld __cnfn convert_ushort_sat_rtz(int);\n" |
| 27079 | "ushort __ovld __cnfn convert_ushort_rtp(int);\n" |
| 27080 | "ushort __ovld __cnfn convert_ushort_sat_rtp(int);\n" |
| 27081 | "ushort __ovld __cnfn convert_ushort_rtn(int);\n" |
| 27082 | "ushort __ovld __cnfn convert_ushort_sat_rtn(int);\n" |
| 27083 | "ushort __ovld __cnfn convert_ushort(int);\n" |
| 27084 | "ushort __ovld __cnfn convert_ushort_sat(int);\n" |
| 27085 | "ushort __ovld __cnfn convert_ushort_rte(uint);\n" |
| 27086 | "ushort __ovld __cnfn convert_ushort_sat_rte(uint);\n" |
| 27087 | "ushort __ovld __cnfn convert_ushort_rtz(uint);\n" |
| 27088 | "ushort __ovld __cnfn convert_ushort_sat_rtz(uint);\n" |
| 27089 | "ushort __ovld __cnfn convert_ushort_rtp(uint);\n" |
| 27090 | "ushort __ovld __cnfn convert_ushort_sat_rtp(uint);\n" |
| 27091 | "ushort __ovld __cnfn convert_ushort_rtn(uint);\n" |
| 27092 | "ushort __ovld __cnfn convert_ushort_sat_rtn(uint);\n" |
| 27093 | "ushort __ovld __cnfn convert_ushort(uint);\n" |
| 27094 | "ushort __ovld __cnfn convert_ushort_sat(uint);\n" |
| 27095 | "ushort __ovld __cnfn convert_ushort_rte(long);\n" |
| 27096 | "ushort __ovld __cnfn convert_ushort_sat_rte(long);\n" |
| 27097 | "ushort __ovld __cnfn convert_ushort_rtz(long);\n" |
| 27098 | "ushort __ovld __cnfn convert_ushort_sat_rtz(long);\n" |
| 27099 | "ushort __ovld __cnfn convert_ushort_rtp(long);\n" |
| 27100 | "ushort __ovld __cnfn convert_ushort_sat_rtp(long);\n" |
| 27101 | "ushort __ovld __cnfn convert_ushort_rtn(long);\n" |
| 27102 | "ushort __ovld __cnfn convert_ushort_sat_rtn(long);\n" |
| 27103 | "ushort __ovld __cnfn convert_ushort(long);\n" |
| 27104 | "ushort __ovld __cnfn convert_ushort_sat(long);\n" |
| 27105 | "ushort __ovld __cnfn convert_ushort_rte(ulong);\n" |
| 27106 | "ushort __ovld __cnfn convert_ushort_sat_rte(ulong);\n" |
| 27107 | "ushort __ovld __cnfn convert_ushort_rtz(ulong);\n" |
| 27108 | "ushort __ovld __cnfn convert_ushort_sat_rtz(ulong);\n" |
| 27109 | "ushort __ovld __cnfn convert_ushort_rtp(ulong);\n" |
| 27110 | "ushort __ovld __cnfn convert_ushort_sat_rtp(ulong);\n" |
| 27111 | "ushort __ovld __cnfn convert_ushort_rtn(ulong);\n" |
| 27112 | "ushort __ovld __cnfn convert_ushort_sat_rtn(ulong);\n" |
| 27113 | "ushort __ovld __cnfn convert_ushort(ulong);\n" |
| 27114 | "ushort __ovld __cnfn convert_ushort_sat(ulong);\n" |
| 27115 | "ushort __ovld __cnfn convert_ushort_rte(float);\n" |
| 27116 | "ushort __ovld __cnfn convert_ushort_sat_rte(float);\n" |
| 27117 | "ushort __ovld __cnfn convert_ushort_rtz(float);\n" |
| 27118 | "ushort __ovld __cnfn convert_ushort_sat_rtz(float);\n" |
| 27119 | "ushort __ovld __cnfn convert_ushort_rtp(float);\n" |
| 27120 | "ushort __ovld __cnfn convert_ushort_sat_rtp(float);\n" |
| 27121 | "ushort __ovld __cnfn convert_ushort_rtn(float);\n" |
| 27122 | "ushort __ovld __cnfn convert_ushort_sat_rtn(float);\n" |
| 27123 | "ushort __ovld __cnfn convert_ushort(float);\n" |
| 27124 | "ushort __ovld __cnfn convert_ushort_sat(float);\n" |
| 27125 | "int __ovld __cnfn convert_int_rte(char);\n" |
| 27126 | "int __ovld __cnfn convert_int_sat_rte(char);\n" |
| 27127 | "int __ovld __cnfn convert_int_rtz(char);\n" |
| 27128 | "int __ovld __cnfn convert_int_sat_rtz(char);\n" |
| 27129 | "int __ovld __cnfn convert_int_rtp(char);\n" |
| 27130 | "int __ovld __cnfn convert_int_sat_rtp(char);\n" |
| 27131 | "int __ovld __cnfn convert_int_rtn(char);\n" |
| 27132 | "int __ovld __cnfn convert_int_sat_rtn(char);\n" |
| 27133 | "int __ovld __cnfn convert_int(char);\n" |
| 27134 | "int __ovld __cnfn convert_int_sat(char);\n" |
| 27135 | "int __ovld __cnfn convert_int_rte(uchar);\n" |
| 27136 | "int __ovld __cnfn convert_int_sat_rte(uchar);\n" |
| 27137 | "int __ovld __cnfn convert_int_rtz(uchar);\n" |
| 27138 | "int __ovld __cnfn convert_int_sat_rtz(uchar);\n" |
| 27139 | "int __ovld __cnfn convert_int_rtp(uchar);\n" |
| 27140 | "int __ovld __cnfn convert_int_sat_rtp(uchar);\n" |
| 27141 | "int __ovld __cnfn convert_int_rtn(uchar);\n" |
| 27142 | "int __ovld __cnfn convert_int_sat_rtn(uchar);\n" |
| 27143 | "int __ovld __cnfn convert_int(uchar);\n" |
| 27144 | "int __ovld __cnfn convert_int_sat(uchar);\n" |
| 27145 | "int __ovld __cnfn convert_int_rte(short);\n" |
| 27146 | "int __ovld __cnfn convert_int_sat_rte(short);\n" |
| 27147 | "int __ovld __cnfn convert_int_rtz(short);\n" |
| 27148 | "int __ovld __cnfn convert_int_sat_rtz(short);\n" |
| 27149 | "int __ovld __cnfn convert_int_rtp(short);\n" |
| 27150 | "int __ovld __cnfn convert_int_sat_rtp(short);\n" |
| 27151 | "int __ovld __cnfn convert_int_rtn(short);\n" |
| 27152 | "int __ovld __cnfn convert_int_sat_rtn(short);\n" |
| 27153 | "int __ovld __cnfn convert_int(short);\n" |
| 27154 | "int __ovld __cnfn convert_int_sat(short);\n" |
| 27155 | "int __ovld __cnfn convert_int_rte(ushort);\n" |
| 27156 | "int __ovld __cnfn convert_int_sat_rte(ushort);\n" |
| 27157 | "int __ovld __cnfn convert_int_rtz(ushort);\n" |
| 27158 | "int __ovld __cnfn convert_int_sat_rtz(ushort);\n" |
| 27159 | "int __ovld __cnfn convert_int_rtp(ushort);\n" |
| 27160 | "int __ovld __cnfn convert_int_sat_rtp(ushort);\n" |
| 27161 | "int __ovld __cnfn convert_int_rtn(ushort);\n" |
| 27162 | "int __ovld __cnfn convert_int_sat_rtn(ushort);\n" |
| 27163 | "int __ovld __cnfn convert_int(ushort);\n" |
| 27164 | "int __ovld __cnfn convert_int_sat(ushort);\n" |
| 27165 | "int __ovld __cnfn convert_int_rte(int);\n" |
| 27166 | "int __ovld __cnfn convert_int_sat_rte(int);\n" |
| 27167 | "int __ovld __cnfn convert_int_rtz(int);\n" |
| 27168 | "int __ovld __cnfn convert_int_sat_rtz(int);\n" |
| 27169 | "int __ovld __cnfn convert_int_rtp(int);\n" |
| 27170 | "int __ovld __cnfn convert_int_sat_rtp(int);\n" |
| 27171 | "int __ovld __cnfn convert_int_rtn(int);\n" |
| 27172 | "int __ovld __cnfn convert_int_sat_rtn(int);\n" |
| 27173 | "int __ovld __cnfn convert_int(int);\n" |
| 27174 | "int __ovld __cnfn convert_int_sat(int);\n" |
| 27175 | "int __ovld __cnfn convert_int_rte(uint);\n" |
| 27176 | "int __ovld __cnfn convert_int_sat_rte(uint);\n" |
| 27177 | "int __ovld __cnfn convert_int_rtz(uint);\n" |
| 27178 | "int __ovld __cnfn convert_int_sat_rtz(uint);\n" |
| 27179 | "int __ovld __cnfn convert_int_rtp(uint);\n" |
| 27180 | "int __ovld __cnfn convert_int_sat_rtp(uint);\n" |
| 27181 | "int __ovld __cnfn convert_int_rtn(uint);\n" |
| 27182 | "int __ovld __cnfn convert_int_sat_rtn(uint);\n" |
| 27183 | "int __ovld __cnfn convert_int(uint);\n" |
| 27184 | "int __ovld __cnfn convert_int_sat(uint);\n" |
| 27185 | "int __ovld __cnfn convert_int_rte(long);\n" |
| 27186 | "int __ovld __cnfn convert_int_sat_rte(long);\n" |
| 27187 | "int __ovld __cnfn convert_int_rtz(long);\n" |
| 27188 | "int __ovld __cnfn convert_int_sat_rtz(long);\n" |
| 27189 | "int __ovld __cnfn convert_int_rtp(long);\n" |
| 27190 | "int __ovld __cnfn convert_int_sat_rtp(long);\n" |
| 27191 | "int __ovld __cnfn convert_int_rtn(long);\n" |
| 27192 | "int __ovld __cnfn convert_int_sat_rtn(long);\n" |
| 27193 | "int __ovld __cnfn convert_int(long);\n" |
| 27194 | "int __ovld __cnfn convert_int_sat(long);\n" |
| 27195 | "int __ovld __cnfn convert_int_rte(ulong);\n" |
| 27196 | "int __ovld __cnfn convert_int_sat_rte(ulong);\n" |
| 27197 | "int __ovld __cnfn convert_int_rtz(ulong);\n" |
| 27198 | "int __ovld __cnfn convert_int_sat_rtz(ulong);\n" |
| 27199 | "int __ovld __cnfn convert_int_rtp(ulong);\n" |
| 27200 | "int __ovld __cnfn convert_int_sat_rtp(ulong);\n" |
| 27201 | "int __ovld __cnfn convert_int_rtn(ulong);\n" |
| 27202 | "int __ovld __cnfn convert_int_sat_rtn(ulong);\n" |
| 27203 | "int __ovld __cnfn convert_int(ulong);\n" |
| 27204 | "int __ovld __cnfn convert_int_sat(ulong);\n" |
| 27205 | "int __ovld __cnfn convert_int_rte(float);\n" |
| 27206 | "int __ovld __cnfn convert_int_sat_rte(float);\n" |
| 27207 | "int __ovld __cnfn convert_int_rtz(float);\n" |
| 27208 | "int __ovld __cnfn convert_int_sat_rtz(float);\n" |
| 27209 | "int __ovld __cnfn convert_int_rtp(float);\n" |
| 27210 | "int __ovld __cnfn convert_int_sat_rtp(float);\n" |
| 27211 | "int __ovld __cnfn convert_int_rtn(float);\n" |
| 27212 | "int __ovld __cnfn convert_int_sat_rtn(float);\n" |
| 27213 | "int __ovld __cnfn convert_int(float);\n" |
| 27214 | "int __ovld __cnfn convert_int_sat(float);\n" |
| 27215 | "uint __ovld __cnfn convert_uint_rte(char);\n" |
| 27216 | "uint __ovld __cnfn convert_uint_sat_rte(char);\n" |
| 27217 | "uint __ovld __cnfn convert_uint_rtz(char);\n" |
| 27218 | "uint __ovld __cnfn convert_uint_sat_rtz(char);\n" |
| 27219 | "uint __ovld __cnfn convert_uint_rtp(char);\n" |
| 27220 | "uint __ovld __cnfn convert_uint_sat_rtp(char);\n" |
| 27221 | "uint __ovld __cnfn convert_uint_rtn(char);\n" |
| 27222 | "uint __ovld __cnfn convert_uint_sat_rtn(char);\n" |
| 27223 | "uint __ovld __cnfn convert_uint(char);\n" |
| 27224 | "uint __ovld __cnfn convert_uint_sat(char);\n" |
| 27225 | "uint __ovld __cnfn convert_uint_rte(uchar);\n" |
| 27226 | "uint __ovld __cnfn convert_uint_sat_rte(uchar);\n" |
| 27227 | "uint __ovld __cnfn convert_uint_rtz(uchar);\n" |
| 27228 | "uint __ovld __cnfn convert_uint_sat_rtz(uchar);\n" |
| 27229 | "uint __ovld __cnfn convert_uint_rtp(uchar);\n" |
| 27230 | "uint __ovld __cnfn convert_uint_sat_rtp(uchar);\n" |
| 27231 | "uint __ovld __cnfn convert_uint_rtn(uchar);\n" |
| 27232 | "uint __ovld __cnfn convert_uint_sat_rtn(uchar);\n" |
| 27233 | "uint __ovld __cnfn convert_uint(uchar);\n" |
| 27234 | "uint __ovld __cnfn convert_uint_sat(uchar);\n" |
| 27235 | "uint __ovld __cnfn convert_uint_rte(short);\n" |
| 27236 | "uint __ovld __cnfn convert_uint_sat_rte(short);\n" |
| 27237 | "uint __ovld __cnfn convert_uint_rtz(short);\n" |
| 27238 | "uint __ovld __cnfn convert_uint_sat_rtz(short);\n" |
| 27239 | "uint __ovld __cnfn convert_uint_rtp(short);\n" |
| 27240 | "uint __ovld __cnfn convert_uint_sat_rtp(short);\n" |
| 27241 | "uint __ovld __cnfn convert_uint_rtn(short);\n" |
| 27242 | "uint __ovld __cnfn convert_uint_sat_rtn(short);\n" |
| 27243 | "uint __ovld __cnfn convert_uint(short);\n" |
| 27244 | "uint __ovld __cnfn convert_uint_sat(short);\n" |
| 27245 | "uint __ovld __cnfn convert_uint_rte(ushort);\n" |
| 27246 | "uint __ovld __cnfn convert_uint_sat_rte(ushort);\n" |
| 27247 | "uint __ovld __cnfn convert_uint_rtz(ushort);\n" |
| 27248 | "uint __ovld __cnfn convert_uint_sat_rtz(ushort);\n" |
| 27249 | "uint __ovld __cnfn convert_uint_rtp(ushort);\n" |
| 27250 | "uint __ovld __cnfn convert_uint_sat_rtp(ushort);\n" |
| 27251 | "uint __ovld __cnfn convert_uint_rtn(ushort);\n" |
| 27252 | "uint __ovld __cnfn convert_uint_sat_rtn(ushort);\n" |
| 27253 | "uint __ovld __cnfn convert_uint(ushort);\n" |
| 27254 | "uint __ovld __cnfn convert_uint_sat(ushort);\n" |
| 27255 | "uint __ovld __cnfn convert_uint_rte(int);\n" |
| 27256 | "uint __ovld __cnfn convert_uint_sat_rte(int);\n" |
| 27257 | "uint __ovld __cnfn convert_uint_rtz(int);\n" |
| 27258 | "uint __ovld __cnfn convert_uint_sat_rtz(int);\n" |
| 27259 | "uint __ovld __cnfn convert_uint_rtp(int);\n" |
| 27260 | "uint __ovld __cnfn convert_uint_sat_rtp(int);\n" |
| 27261 | "uint __ovld __cnfn convert_uint_rtn(int);\n" |
| 27262 | "uint __ovld __cnfn convert_uint_sat_rtn(int);\n" |
| 27263 | "uint __ovld __cnfn convert_uint(int);\n" |
| 27264 | "uint __ovld __cnfn convert_uint_sat(int);\n" |
| 27265 | "uint __ovld __cnfn convert_uint_rte(uint);\n" |
| 27266 | "uint __ovld __cnfn convert_uint_sat_rte(uint);\n" |
| 27267 | "uint __ovld __cnfn convert_uint_rtz(uint);\n" |
| 27268 | "uint __ovld __cnfn convert_uint_sat_rtz(uint);\n" |
| 27269 | "uint __ovld __cnfn convert_uint_rtp(uint);\n" |
| 27270 | "uint __ovld __cnfn convert_uint_sat_rtp(uint);\n" |
| 27271 | "uint __ovld __cnfn convert_uint_rtn(uint);\n" |
| 27272 | "uint __ovld __cnfn convert_uint_sat_rtn(uint);\n" |
| 27273 | "uint __ovld __cnfn convert_uint(uint);\n" |
| 27274 | "uint __ovld __cnfn convert_uint_sat(uint);\n" |
| 27275 | "uint __ovld __cnfn convert_uint_rte(long);\n" |
| 27276 | "uint __ovld __cnfn convert_uint_sat_rte(long);\n" |
| 27277 | "uint __ovld __cnfn convert_uint_rtz(long);\n" |
| 27278 | "uint __ovld __cnfn convert_uint_sat_rtz(long);\n" |
| 27279 | "uint __ovld __cnfn convert_uint_rtp(long);\n" |
| 27280 | "uint __ovld __cnfn convert_uint_sat_rtp(long);\n" |
| 27281 | "uint __ovld __cnfn convert_uint_rtn(long);\n" |
| 27282 | "uint __ovld __cnfn convert_uint_sat_rtn(long);\n" |
| 27283 | "uint __ovld __cnfn convert_uint(long);\n" |
| 27284 | "uint __ovld __cnfn convert_uint_sat(long);\n" |
| 27285 | "uint __ovld __cnfn convert_uint_rte(ulong);\n" |
| 27286 | "uint __ovld __cnfn convert_uint_sat_rte(ulong);\n" |
| 27287 | "uint __ovld __cnfn convert_uint_rtz(ulong);\n" |
| 27288 | "uint __ovld __cnfn convert_uint_sat_rtz(ulong);\n" |
| 27289 | "uint __ovld __cnfn convert_uint_rtp(ulong);\n" |
| 27290 | "uint __ovld __cnfn convert_uint_sat_rtp(ulong);\n" |
| 27291 | "uint __ovld __cnfn convert_uint_rtn(ulong);\n" |
| 27292 | "uint __ovld __cnfn convert_uint_sat_rtn(ulong);\n" |
| 27293 | "uint __ovld __cnfn convert_uint(ulong);\n" |
| 27294 | "uint __ovld __cnfn convert_uint_sat(ulong);\n" |
| 27295 | "uint __ovld __cnfn convert_uint_rte(float);\n" |
| 27296 | "uint __ovld __cnfn convert_uint_sat_rte(float);\n" |
| 27297 | "uint __ovld __cnfn convert_uint_rtz(float);\n" |
| 27298 | "uint __ovld __cnfn convert_uint_sat_rtz(float);\n" |
| 27299 | "uint __ovld __cnfn convert_uint_rtp(float);\n" |
| 27300 | "uint __ovld __cnfn convert_uint_sat_rtp(float);\n" |
| 27301 | "uint __ovld __cnfn convert_uint_rtn(float);\n" |
| 27302 | "uint __ovld __cnfn convert_uint_sat_rtn(float);\n" |
| 27303 | "uint __ovld __cnfn convert_uint(float);\n" |
| 27304 | "uint __ovld __cnfn convert_uint_sat(float);\n" |
| 27305 | "long __ovld __cnfn convert_long_rte(char);\n" |
| 27306 | "long __ovld __cnfn convert_long_sat_rte(char);\n" |
| 27307 | "long __ovld __cnfn convert_long_rtz(char);\n" |
| 27308 | "long __ovld __cnfn convert_long_sat_rtz(char);\n" |
| 27309 | "long __ovld __cnfn convert_long_rtp(char);\n" |
| 27310 | "long __ovld __cnfn convert_long_sat_rtp(char);\n" |
| 27311 | "long __ovld __cnfn convert_long_rtn(char);\n" |
| 27312 | "long __ovld __cnfn convert_long_sat_rtn(char);\n" |
| 27313 | "long __ovld __cnfn convert_long(char);\n" |
| 27314 | "long __ovld __cnfn convert_long_sat(char);\n" |
| 27315 | "long __ovld __cnfn convert_long_rte(uchar);\n" |
| 27316 | "long __ovld __cnfn convert_long_sat_rte(uchar);\n" |
| 27317 | "long __ovld __cnfn convert_long_rtz(uchar);\n" |
| 27318 | "long __ovld __cnfn convert_long_sat_rtz(uchar);\n" |
| 27319 | "long __ovld __cnfn convert_long_rtp(uchar);\n" |
| 27320 | "long __ovld __cnfn convert_long_sat_rtp(uchar);\n" |
| 27321 | "long __ovld __cnfn convert_long_rtn(uchar);\n" |
| 27322 | "long __ovld __cnfn convert_long_sat_rtn(uchar);\n" |
| 27323 | "long __ovld __cnfn convert_long(uchar);\n" |
| 27324 | "long __ovld __cnfn convert_long_sat(uchar);\n" |
| 27325 | "long __ovld __cnfn convert_long_rte(short);\n" |
| 27326 | "long __ovld __cnfn convert_long_sat_rte(short);\n" |
| 27327 | "long __ovld __cnfn convert_long_rtz(short);\n" |
| 27328 | "long __ovld __cnfn convert_long_sat_rtz(short);\n" |
| 27329 | "long __ovld __cnfn convert_long_rtp(short);\n" |
| 27330 | "long __ovld __cnfn convert_long_sat_rtp(short);\n" |
| 27331 | "long __ovld __cnfn convert_long_rtn(short);\n" |
| 27332 | "long __ovld __cnfn convert_long_sat_rtn(short);\n" |
| 27333 | "long __ovld __cnfn convert_long(short);\n" |
| 27334 | "long __ovld __cnfn convert_long_sat(short);\n" |
| 27335 | "long __ovld __cnfn convert_long_rte(ushort);\n" |
| 27336 | "long __ovld __cnfn convert_long_sat_rte(ushort);\n" |
| 27337 | "long __ovld __cnfn convert_long_rtz(ushort);\n" |
| 27338 | "long __ovld __cnfn convert_long_sat_rtz(ushort);\n" |
| 27339 | "long __ovld __cnfn convert_long_rtp(ushort);\n" |
| 27340 | "long __ovld __cnfn convert_long_sat_rtp(ushort);\n" |
| 27341 | "long __ovld __cnfn convert_long_rtn(ushort);\n" |
| 27342 | "long __ovld __cnfn convert_long_sat_rtn(ushort);\n" |
| 27343 | "long __ovld __cnfn convert_long(ushort);\n" |
| 27344 | "long __ovld __cnfn convert_long_sat(ushort);\n" |
| 27345 | "long __ovld __cnfn convert_long_rte(int);\n" |
| 27346 | "long __ovld __cnfn convert_long_sat_rte(int);\n" |
| 27347 | "long __ovld __cnfn convert_long_rtz(int);\n" |
| 27348 | "long __ovld __cnfn convert_long_sat_rtz(int);\n" |
| 27349 | "long __ovld __cnfn convert_long_rtp(int);\n" |
| 27350 | "long __ovld __cnfn convert_long_sat_rtp(int);\n" |
| 27351 | "long __ovld __cnfn convert_long_rtn(int);\n" |
| 27352 | "long __ovld __cnfn convert_long_sat_rtn(int);\n" |
| 27353 | "long __ovld __cnfn convert_long(int);\n" |
| 27354 | "long __ovld __cnfn convert_long_sat(int);\n" |
| 27355 | "long __ovld __cnfn convert_long_rte(uint);\n" |
| 27356 | "long __ovld __cnfn convert_long_sat_rte(uint);\n" |
| 27357 | "long __ovld __cnfn convert_long_rtz(uint);\n" |
| 27358 | "long __ovld __cnfn convert_long_sat_rtz(uint);\n" |
| 27359 | "long __ovld __cnfn convert_long_rtp(uint);\n" |
| 27360 | "long __ovld __cnfn convert_long_sat_rtp(uint);\n" |
| 27361 | "long __ovld __cnfn convert_long_rtn(uint);\n" |
| 27362 | "long __ovld __cnfn convert_long_sat_rtn(uint);\n" |
| 27363 | "long __ovld __cnfn convert_long(uint);\n" |
| 27364 | "long __ovld __cnfn convert_long_sat(uint);\n" |
| 27365 | "long __ovld __cnfn convert_long_rte(long);\n" |
| 27366 | "long __ovld __cnfn convert_long_sat_rte(long);\n" |
| 27367 | "long __ovld __cnfn convert_long_rtz(long);\n" |
| 27368 | "long __ovld __cnfn convert_long_sat_rtz(long);\n" |
| 27369 | "long __ovld __cnfn convert_long_rtp(long);\n" |
| 27370 | "long __ovld __cnfn convert_long_sat_rtp(long);\n" |
| 27371 | "long __ovld __cnfn convert_long_rtn(long);\n" |
| 27372 | "long __ovld __cnfn convert_long_sat_rtn(long);\n" |
| 27373 | "long __ovld __cnfn convert_long(long);\n" |
| 27374 | "long __ovld __cnfn convert_long_sat(long);\n" |
| 27375 | "long __ovld __cnfn convert_long_rte(ulong);\n" |
| 27376 | "long __ovld __cnfn convert_long_sat_rte(ulong);\n" |
| 27377 | "long __ovld __cnfn convert_long_rtz(ulong);\n" |
| 27378 | "long __ovld __cnfn convert_long_sat_rtz(ulong);\n" |
| 27379 | "long __ovld __cnfn convert_long_rtp(ulong);\n" |
| 27380 | "long __ovld __cnfn convert_long_sat_rtp(ulong);\n" |
| 27381 | "long __ovld __cnfn convert_long_rtn(ulong);\n" |
| 27382 | "long __ovld __cnfn convert_long_sat_rtn(ulong);\n" |
| 27383 | "long __ovld __cnfn convert_long(ulong);\n" |
| 27384 | "long __ovld __cnfn convert_long_sat(ulong);\n" |
| 27385 | "long __ovld __cnfn convert_long_rte(float);\n" |
| 27386 | "long __ovld __cnfn convert_long_sat_rte(float);\n" |
| 27387 | "long __ovld __cnfn convert_long_rtz(float);\n" |
| 27388 | "long __ovld __cnfn convert_long_sat_rtz(float);\n" |
| 27389 | "long __ovld __cnfn convert_long_rtp(float);\n" |
| 27390 | "long __ovld __cnfn convert_long_sat_rtp(float);\n" |
| 27391 | "long __ovld __cnfn convert_long_rtn(float);\n" |
| 27392 | "long __ovld __cnfn convert_long_sat_rtn(float);\n" |
| 27393 | "long __ovld __cnfn convert_long(float);\n" |
| 27394 | "long __ovld __cnfn convert_long_sat(float);\n" |
| 27395 | "ulong __ovld __cnfn convert_ulong_rte(char);\n" |
| 27396 | "ulong __ovld __cnfn convert_ulong_sat_rte(char);\n" |
| 27397 | "ulong __ovld __cnfn convert_ulong_rtz(char);\n" |
| 27398 | "ulong __ovld __cnfn convert_ulong_sat_rtz(char);\n" |
| 27399 | "ulong __ovld __cnfn convert_ulong_rtp(char);\n" |
| 27400 | "ulong __ovld __cnfn convert_ulong_sat_rtp(char);\n" |
| 27401 | "ulong __ovld __cnfn convert_ulong_rtn(char);\n" |
| 27402 | "ulong __ovld __cnfn convert_ulong_sat_rtn(char);\n" |
| 27403 | "ulong __ovld __cnfn convert_ulong(char);\n" |
| 27404 | "ulong __ovld __cnfn convert_ulong_sat(char);\n" |
| 27405 | "ulong __ovld __cnfn convert_ulong_rte(uchar);\n" |
| 27406 | "ulong __ovld __cnfn convert_ulong_sat_rte(uchar);\n" |
| 27407 | "ulong __ovld __cnfn convert_ulong_rtz(uchar);\n" |
| 27408 | "ulong __ovld __cnfn convert_ulong_sat_rtz(uchar);\n" |
| 27409 | "ulong __ovld __cnfn convert_ulong_rtp(uchar);\n" |
| 27410 | "ulong __ovld __cnfn convert_ulong_sat_rtp(uchar);\n" |
| 27411 | "ulong __ovld __cnfn convert_ulong_rtn(uchar);\n" |
| 27412 | "ulong __ovld __cnfn convert_ulong_sat_rtn(uchar);\n" |
| 27413 | "ulong __ovld __cnfn convert_ulong(uchar);\n" |
| 27414 | "ulong __ovld __cnfn convert_ulong_sat(uchar);\n" |
| 27415 | "ulong __ovld __cnfn convert_ulong_rte(short);\n" |
| 27416 | "ulong __ovld __cnfn convert_ulong_sat_rte(short);\n" |
| 27417 | "ulong __ovld __cnfn convert_ulong_rtz(short);\n" |
| 27418 | "ulong __ovld __cnfn convert_ulong_sat_rtz(short);\n" |
| 27419 | "ulong __ovld __cnfn convert_ulong_rtp(short);\n" |
| 27420 | "ulong __ovld __cnfn convert_ulong_sat_rtp(short);\n" |
| 27421 | "ulong __ovld __cnfn convert_ulong_rtn(short);\n" |
| 27422 | "ulong __ovld __cnfn convert_ulong_sat_rtn(short);\n" |
| 27423 | "ulong __ovld __cnfn convert_ulong(short);\n" |
| 27424 | "ulong __ovld __cnfn convert_ulong_sat(short);\n" |
| 27425 | "ulong __ovld __cnfn convert_ulong_rte(ushort);\n" |
| 27426 | "ulong __ovld __cnfn convert_ulong_sat_rte(ushort);\n" |
| 27427 | "ulong __ovld __cnfn convert_ulong_rtz(ushort);\n" |
| 27428 | "ulong __ovld __cnfn convert_ulong_sat_rtz(ushort);\n" |
| 27429 | "ulong __ovld __cnfn convert_ulong_rtp(ushort);\n" |
| 27430 | "ulong __ovld __cnfn convert_ulong_sat_rtp(ushort);\n" |
| 27431 | "ulong __ovld __cnfn convert_ulong_rtn(ushort);\n" |
| 27432 | "ulong __ovld __cnfn convert_ulong_sat_rtn(ushort);\n" |
| 27433 | "ulong __ovld __cnfn convert_ulong(ushort);\n" |
| 27434 | "ulong __ovld __cnfn convert_ulong_sat(ushort);\n" |
| 27435 | "ulong __ovld __cnfn convert_ulong_rte(int);\n" |
| 27436 | "ulong __ovld __cnfn convert_ulong_sat_rte(int);\n" |
| 27437 | "ulong __ovld __cnfn convert_ulong_rtz(int);\n" |
| 27438 | "ulong __ovld __cnfn convert_ulong_sat_rtz(int);\n" |
| 27439 | "ulong __ovld __cnfn convert_ulong_rtp(int);\n" |
| 27440 | "ulong __ovld __cnfn convert_ulong_sat_rtp(int);\n" |
| 27441 | "ulong __ovld __cnfn convert_ulong_rtn(int);\n" |
| 27442 | "ulong __ovld __cnfn convert_ulong_sat_rtn(int);\n" |
| 27443 | "ulong __ovld __cnfn convert_ulong(int);\n" |
| 27444 | "ulong __ovld __cnfn convert_ulong_sat(int);\n" |
| 27445 | "ulong __ovld __cnfn convert_ulong_rte(uint);\n" |
| 27446 | "ulong __ovld __cnfn convert_ulong_sat_rte(uint);\n" |
| 27447 | "ulong __ovld __cnfn convert_ulong_rtz(uint);\n" |
| 27448 | "ulong __ovld __cnfn convert_ulong_sat_rtz(uint);\n" |
| 27449 | "ulong __ovld __cnfn convert_ulong_rtp(uint);\n" |
| 27450 | "ulong __ovld __cnfn convert_ulong_sat_rtp(uint);\n" |
| 27451 | "ulong __ovld __cnfn convert_ulong_rtn(uint);\n" |
| 27452 | "ulong __ovld __cnfn convert_ulong_sat_rtn(uint);\n" |
| 27453 | "ulong __ovld __cnfn convert_ulong(uint);\n" |
| 27454 | "ulong __ovld __cnfn convert_ulong_sat(uint);\n" |
| 27455 | "ulong __ovld __cnfn convert_ulong_rte(long);\n" |
| 27456 | "ulong __ovld __cnfn convert_ulong_sat_rte(long);\n" |
| 27457 | "ulong __ovld __cnfn convert_ulong_rtz(long);\n" |
| 27458 | "ulong __ovld __cnfn convert_ulong_sat_rtz(long);\n" |
| 27459 | "ulong __ovld __cnfn convert_ulong_rtp(long);\n" |
| 27460 | "ulong __ovld __cnfn convert_ulong_sat_rtp(long);\n" |
| 27461 | "ulong __ovld __cnfn convert_ulong_rtn(long);\n" |
| 27462 | "ulong __ovld __cnfn convert_ulong_sat_rtn(long);\n" |
| 27463 | "ulong __ovld __cnfn convert_ulong(long);\n" |
| 27464 | "ulong __ovld __cnfn convert_ulong_sat(long);\n" |
| 27465 | "ulong __ovld __cnfn convert_ulong_rte(ulong);\n" |
| 27466 | "ulong __ovld __cnfn convert_ulong_sat_rte(ulong);\n" |
| 27467 | "ulong __ovld __cnfn convert_ulong_rtz(ulong);\n" |
| 27468 | "ulong __ovld __cnfn convert_ulong_sat_rtz(ulong);\n" |
| 27469 | "ulong __ovld __cnfn convert_ulong_rtp(ulong);\n" |
| 27470 | "ulong __ovld __cnfn convert_ulong_sat_rtp(ulong);\n" |
| 27471 | "ulong __ovld __cnfn convert_ulong_rtn(ulong);\n" |
| 27472 | "ulong __ovld __cnfn convert_ulong_sat_rtn(ulong);\n" |
| 27473 | "ulong __ovld __cnfn convert_ulong(ulong);\n" |
| 27474 | "ulong __ovld __cnfn convert_ulong_sat(ulong);\n" |
| 27475 | "ulong __ovld __cnfn convert_ulong_rte(float);\n" |
| 27476 | "ulong __ovld __cnfn convert_ulong_sat_rte(float);\n" |
| 27477 | "ulong __ovld __cnfn convert_ulong_rtz(float);\n" |
| 27478 | "ulong __ovld __cnfn convert_ulong_sat_rtz(float);\n" |
| 27479 | "ulong __ovld __cnfn convert_ulong_rtp(float);\n" |
| 27480 | "ulong __ovld __cnfn convert_ulong_sat_rtp(float);\n" |
| 27481 | "ulong __ovld __cnfn convert_ulong_rtn(float);\n" |
| 27482 | "ulong __ovld __cnfn convert_ulong_sat_rtn(float);\n" |
| 27483 | "ulong __ovld __cnfn convert_ulong(float);\n" |
| 27484 | "ulong __ovld __cnfn convert_ulong_sat(float);\n" |
| 27485 | "float __ovld __cnfn convert_float_rte(char);\n" |
| 27486 | "float __ovld __cnfn convert_float_rtz(char);\n" |
| 27487 | "float __ovld __cnfn convert_float_rtp(char);\n" |
| 27488 | "float __ovld __cnfn convert_float_rtn(char);\n" |
| 27489 | "float __ovld __cnfn convert_float(char);\n" |
| 27490 | "float __ovld __cnfn convert_float_rte(uchar);\n" |
| 27491 | "float __ovld __cnfn convert_float_rtz(uchar);\n" |
| 27492 | "float __ovld __cnfn convert_float_rtp(uchar);\n" |
| 27493 | "float __ovld __cnfn convert_float_rtn(uchar);\n" |
| 27494 | "float __ovld __cnfn convert_float(uchar);\n" |
| 27495 | "float __ovld __cnfn convert_float_rte(short);\n" |
| 27496 | "float __ovld __cnfn convert_float_rtz(short);\n" |
| 27497 | "float __ovld __cnfn convert_float_rtp(short);\n" |
| 27498 | "float __ovld __cnfn convert_float_rtn(short);\n" |
| 27499 | "float __ovld __cnfn convert_float(short);\n" |
| 27500 | "float __ovld __cnfn convert_float_rte(ushort);\n" |
| 27501 | "float __ovld __cnfn convert_float_rtz(ushort);\n" |
| 27502 | "float __ovld __cnfn convert_float_rtp(ushort);\n" |
| 27503 | "float __ovld __cnfn convert_float_rtn(ushort);\n" |
| 27504 | "float __ovld __cnfn convert_float(ushort);\n" |
| 27505 | "float __ovld __cnfn convert_float_rte(int);\n" |
| 27506 | "float __ovld __cnfn convert_float_rtz(int);\n" |
| 27507 | "float __ovld __cnfn convert_float_rtp(int);\n" |
| 27508 | "float __ovld __cnfn convert_float_rtn(int);\n" |
| 27509 | "float __ovld __cnfn convert_float(int);\n" |
| 27510 | "float __ovld __cnfn convert_float_rte(uint);\n" |
| 27511 | "float __ovld __cnfn convert_float_rtz(uint);\n" |
| 27512 | "float __ovld __cnfn convert_float_rtp(uint);\n" |
| 27513 | "float __ovld __cnfn convert_float_rtn(uint);\n" |
| 27514 | "float __ovld __cnfn convert_float(uint);\n" |
| 27515 | "float __ovld __cnfn convert_float_rte(long);\n" |
| 27516 | "float __ovld __cnfn convert_float_rtz(long);\n" |
| 27517 | "float __ovld __cnfn convert_float_rtp(long);\n" |
| 27518 | "float __ovld __cnfn convert_float_rtn(long);\n" |
| 27519 | "float __ovld __cnfn convert_float(long);\n" |
| 27520 | "float __ovld __cnfn convert_float_rte(ulong);\n" |
| 27521 | "float __ovld __cnfn convert_float_rtz(ulong);\n" |
| 27522 | "float __ovld __cnfn convert_float_rtp(ulong);\n" |
| 27523 | "float __ovld __cnfn convert_float_rtn(ulong);\n" |
| 27524 | "float __ovld __cnfn convert_float(ulong);\n" |
| 27525 | "float __ovld __cnfn convert_float_rte(float);\n" |
| 27526 | "float __ovld __cnfn convert_float_rtz(float);\n" |
| 27527 | "float __ovld __cnfn convert_float_rtp(float);\n" |
| 27528 | "float __ovld __cnfn convert_float_rtn(float);\n" |
| 27529 | "float __ovld __cnfn convert_float(float);\n" |
| 27530 | "char2 __ovld __cnfn convert_char2_rte(char2);\n" |
| 27531 | "char2 __ovld __cnfn convert_char2_sat_rte(char2);\n" |
| 27532 | "char2 __ovld __cnfn convert_char2_rtz(char2);\n" |
| 27533 | "char2 __ovld __cnfn convert_char2_sat_rtz(char2);\n" |
| 27534 | "char2 __ovld __cnfn convert_char2_rtp(char2);\n" |
| 27535 | "char2 __ovld __cnfn convert_char2_sat_rtp(char2);\n" |
| 27536 | "char2 __ovld __cnfn convert_char2_rtn(char2);\n" |
| 27537 | "char2 __ovld __cnfn convert_char2_sat_rtn(char2);\n" |
| 27538 | "char2 __ovld __cnfn convert_char2(char2);\n" |
| 27539 | "char2 __ovld __cnfn convert_char2_sat(char2);\n" |
| 27540 | "char2 __ovld __cnfn convert_char2_rte(uchar2);\n" |
| 27541 | "char2 __ovld __cnfn convert_char2_sat_rte(uchar2);\n" |
| 27542 | "char2 __ovld __cnfn convert_char2_rtz(uchar2);\n" |
| 27543 | "char2 __ovld __cnfn convert_char2_sat_rtz(uchar2);\n" |
| 27544 | "char2 __ovld __cnfn convert_char2_rtp(uchar2);\n" |
| 27545 | "char2 __ovld __cnfn convert_char2_sat_rtp(uchar2);\n" |
| 27546 | "char2 __ovld __cnfn convert_char2_rtn(uchar2);\n" |
| 27547 | "char2 __ovld __cnfn convert_char2_sat_rtn(uchar2);\n" |
| 27548 | "char2 __ovld __cnfn convert_char2(uchar2);\n" |
| 27549 | "char2 __ovld __cnfn convert_char2_sat(uchar2);\n" |
| 27550 | "char2 __ovld __cnfn convert_char2_rte(short2);\n" |
| 27551 | "char2 __ovld __cnfn convert_char2_sat_rte(short2);\n" |
| 27552 | "char2 __ovld __cnfn convert_char2_rtz(short2);\n" |
| 27553 | "char2 __ovld __cnfn convert_char2_sat_rtz(short2);\n" |
| 27554 | "char2 __ovld __cnfn convert_char2_rtp(short2);\n" |
| 27555 | "char2 __ovld __cnfn convert_char2_sat_rtp(short2);\n" |
| 27556 | "char2 __ovld __cnfn convert_char2_rtn(short2);\n" |
| 27557 | "char2 __ovld __cnfn convert_char2_sat_rtn(short2);\n" |
| 27558 | "char2 __ovld __cnfn convert_char2(short2);\n" |
| 27559 | "char2 __ovld __cnfn convert_char2_sat(short2);\n" |
| 27560 | "char2 __ovld __cnfn convert_char2_rte(ushort2);\n" |
| 27561 | "char2 __ovld __cnfn convert_char2_sat_rte(ushort2);\n" |
| 27562 | "char2 __ovld __cnfn convert_char2_rtz(ushort2);\n" |
| 27563 | "char2 __ovld __cnfn convert_char2_sat_rtz(ushort2);\n" |
| 27564 | "char2 __ovld __cnfn convert_char2_rtp(ushort2);\n" |
| 27565 | "char2 __ovld __cnfn convert_char2_sat_rtp(ushort2);\n" |
| 27566 | "char2 __ovld __cnfn convert_char2_rtn(ushort2);\n" |
| 27567 | "char2 __ovld __cnfn convert_char2_sat_rtn(ushort2);\n" |
| 27568 | "char2 __ovld __cnfn convert_char2(ushort2);\n" |
| 27569 | "char2 __ovld __cnfn convert_char2_sat(ushort2);\n" |
| 27570 | "char2 __ovld __cnfn convert_char2_rte(int2);\n" |
| 27571 | "char2 __ovld __cnfn convert_char2_sat_rte(int2);\n" |
| 27572 | "char2 __ovld __cnfn convert_char2_rtz(int2);\n" |
| 27573 | "char2 __ovld __cnfn convert_char2_sat_rtz(int2);\n" |
| 27574 | "char2 __ovld __cnfn convert_char2_rtp(int2);\n" |
| 27575 | "char2 __ovld __cnfn convert_char2_sat_rtp(int2);\n" |
| 27576 | "char2 __ovld __cnfn convert_char2_rtn(int2);\n" |
| 27577 | "char2 __ovld __cnfn convert_char2_sat_rtn(int2);\n" |
| 27578 | "char2 __ovld __cnfn convert_char2(int2);\n" |
| 27579 | "char2 __ovld __cnfn convert_char2_sat(int2);\n" |
| 27580 | "char2 __ovld __cnfn convert_char2_rte(uint2);\n" |
| 27581 | "char2 __ovld __cnfn convert_char2_sat_rte(uint2);\n" |
| 27582 | "char2 __ovld __cnfn convert_char2_rtz(uint2);\n" |
| 27583 | "char2 __ovld __cnfn convert_char2_sat_rtz(uint2);\n" |
| 27584 | "char2 __ovld __cnfn convert_char2_rtp(uint2);\n" |
| 27585 | "char2 __ovld __cnfn convert_char2_sat_rtp(uint2);\n" |
| 27586 | "char2 __ovld __cnfn convert_char2_rtn(uint2);\n" |
| 27587 | "char2 __ovld __cnfn convert_char2_sat_rtn(uint2);\n" |
| 27588 | "char2 __ovld __cnfn convert_char2(uint2);\n" |
| 27589 | "char2 __ovld __cnfn convert_char2_sat(uint2);\n" |
| 27590 | "char2 __ovld __cnfn convert_char2_rte(long2);\n" |
| 27591 | "char2 __ovld __cnfn convert_char2_sat_rte(long2);\n" |
| 27592 | "char2 __ovld __cnfn convert_char2_rtz(long2);\n" |
| 27593 | "char2 __ovld __cnfn convert_char2_sat_rtz(long2);\n" |
| 27594 | "char2 __ovld __cnfn convert_char2_rtp(long2);\n" |
| 27595 | "char2 __ovld __cnfn convert_char2_sat_rtp(long2);\n" |
| 27596 | "char2 __ovld __cnfn convert_char2_rtn(long2);\n" |
| 27597 | "char2 __ovld __cnfn convert_char2_sat_rtn(long2);\n" |
| 27598 | "char2 __ovld __cnfn convert_char2(long2);\n" |
| 27599 | "char2 __ovld __cnfn convert_char2_sat(long2);\n" |
| 27600 | "char2 __ovld __cnfn convert_char2_rte(ulong2);\n" |
| 27601 | "char2 __ovld __cnfn convert_char2_sat_rte(ulong2);\n" |
| 27602 | "char2 __ovld __cnfn convert_char2_rtz(ulong2);\n" |
| 27603 | "char2 __ovld __cnfn convert_char2_sat_rtz(ulong2);\n" |
| 27604 | "char2 __ovld __cnfn convert_char2_rtp(ulong2);\n" |
| 27605 | "char2 __ovld __cnfn convert_char2_sat_rtp(ulong2);\n" |
| 27606 | "char2 __ovld __cnfn convert_char2_rtn(ulong2);\n" |
| 27607 | "char2 __ovld __cnfn convert_char2_sat_rtn(ulong2);\n" |
| 27608 | "char2 __ovld __cnfn convert_char2(ulong2);\n" |
| 27609 | "char2 __ovld __cnfn convert_char2_sat(ulong2);\n" |
| 27610 | "char2 __ovld __cnfn convert_char2_rte(float2);\n" |
| 27611 | "char2 __ovld __cnfn convert_char2_sat_rte(float2);\n" |
| 27612 | "char2 __ovld __cnfn convert_char2_rtz(float2);\n" |
| 27613 | "char2 __ovld __cnfn convert_char2_sat_rtz(float2);\n" |
| 27614 | "char2 __ovld __cnfn convert_char2_rtp(float2);\n" |
| 27615 | "char2 __ovld __cnfn convert_char2_sat_rtp(float2);\n" |
| 27616 | "char2 __ovld __cnfn convert_char2_rtn(float2);\n" |
| 27617 | "char2 __ovld __cnfn convert_char2_sat_rtn(float2);\n" |
| 27618 | "char2 __ovld __cnfn convert_char2(float2);\n" |
| 27619 | "char2 __ovld __cnfn convert_char2_sat(float2);\n" |
| 27620 | "uchar2 __ovld __cnfn convert_uchar2_rte(char2);\n" |
| 27621 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(char2);\n" |
| 27622 | "uchar2 __ovld __cnfn convert_uchar2_rtz(char2);\n" |
| 27623 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(char2);\n" |
| 27624 | "uchar2 __ovld __cnfn convert_uchar2_rtp(char2);\n" |
| 27625 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(char2);\n" |
| 27626 | "uchar2 __ovld __cnfn convert_uchar2_rtn(char2);\n" |
| 27627 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(char2);\n" |
| 27628 | "uchar2 __ovld __cnfn convert_uchar2(char2);\n" |
| 27629 | "uchar2 __ovld __cnfn convert_uchar2_sat(char2);\n" |
| 27630 | "uchar2 __ovld __cnfn convert_uchar2_rte(uchar2);\n" |
| 27631 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(uchar2);\n" |
| 27632 | "uchar2 __ovld __cnfn convert_uchar2_rtz(uchar2);\n" |
| 27633 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uchar2);\n" |
| 27634 | "uchar2 __ovld __cnfn convert_uchar2_rtp(uchar2);\n" |
| 27635 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uchar2);\n" |
| 27636 | "uchar2 __ovld __cnfn convert_uchar2_rtn(uchar2);\n" |
| 27637 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uchar2);\n" |
| 27638 | "uchar2 __ovld __cnfn convert_uchar2(uchar2);\n" |
| 27639 | "uchar2 __ovld __cnfn convert_uchar2_sat(uchar2);\n" |
| 27640 | "uchar2 __ovld __cnfn convert_uchar2_rte(short2);\n" |
| 27641 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(short2);\n" |
| 27642 | "uchar2 __ovld __cnfn convert_uchar2_rtz(short2);\n" |
| 27643 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(short2);\n" |
| 27644 | "uchar2 __ovld __cnfn convert_uchar2_rtp(short2);\n" |
| 27645 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(short2);\n" |
| 27646 | "uchar2 __ovld __cnfn convert_uchar2_rtn(short2);\n" |
| 27647 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(short2);\n" |
| 27648 | "uchar2 __ovld __cnfn convert_uchar2(short2);\n" |
| 27649 | "uchar2 __ovld __cnfn convert_uchar2_sat(short2);\n" |
| 27650 | "uchar2 __ovld __cnfn convert_uchar2_rte(ushort2);\n" |
| 27651 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(ushort2);\n" |
| 27652 | "uchar2 __ovld __cnfn convert_uchar2_rtz(ushort2);\n" |
| 27653 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ushort2);\n" |
| 27654 | "uchar2 __ovld __cnfn convert_uchar2_rtp(ushort2);\n" |
| 27655 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ushort2);\n" |
| 27656 | "uchar2 __ovld __cnfn convert_uchar2_rtn(ushort2);\n" |
| 27657 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ushort2);\n" |
| 27658 | "uchar2 __ovld __cnfn convert_uchar2(ushort2);\n" |
| 27659 | "uchar2 __ovld __cnfn convert_uchar2_sat(ushort2);\n" |
| 27660 | "uchar2 __ovld __cnfn convert_uchar2_rte(int2);\n" |
| 27661 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(int2);\n" |
| 27662 | "uchar2 __ovld __cnfn convert_uchar2_rtz(int2);\n" |
| 27663 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(int2);\n" |
| 27664 | "uchar2 __ovld __cnfn convert_uchar2_rtp(int2);\n" |
| 27665 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(int2);\n" |
| 27666 | "uchar2 __ovld __cnfn convert_uchar2_rtn(int2);\n" |
| 27667 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(int2);\n" |
| 27668 | "uchar2 __ovld __cnfn convert_uchar2(int2);\n" |
| 27669 | "uchar2 __ovld __cnfn convert_uchar2_sat(int2);\n" |
| 27670 | "uchar2 __ovld __cnfn convert_uchar2_rte(uint2);\n" |
| 27671 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(uint2);\n" |
| 27672 | "uchar2 __ovld __cnfn convert_uchar2_rtz(uint2);\n" |
| 27673 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(uint2);\n" |
| 27674 | "uchar2 __ovld __cnfn convert_uchar2_rtp(uint2);\n" |
| 27675 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(uint2);\n" |
| 27676 | "uchar2 __ovld __cnfn convert_uchar2_rtn(uint2);\n" |
| 27677 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(uint2);\n" |
| 27678 | "uchar2 __ovld __cnfn convert_uchar2(uint2);\n" |
| 27679 | "uchar2 __ovld __cnfn convert_uchar2_sat(uint2);\n" |
| 27680 | "uchar2 __ovld __cnfn convert_uchar2_rte(long2);\n" |
| 27681 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(long2);\n" |
| 27682 | "uchar2 __ovld __cnfn convert_uchar2_rtz(long2);\n" |
| 27683 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(long2);\n" |
| 27684 | "uchar2 __ovld __cnfn convert_uchar2_rtp(long2);\n" |
| 27685 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(long2);\n" |
| 27686 | "uchar2 __ovld __cnfn convert_uchar2_rtn(long2);\n" |
| 27687 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(long2);\n" |
| 27688 | "uchar2 __ovld __cnfn convert_uchar2(long2);\n" |
| 27689 | "uchar2 __ovld __cnfn convert_uchar2_sat(long2);\n" |
| 27690 | "uchar2 __ovld __cnfn convert_uchar2_rte(ulong2);\n" |
| 27691 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(ulong2);\n" |
| 27692 | "uchar2 __ovld __cnfn convert_uchar2_rtz(ulong2);\n" |
| 27693 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(ulong2);\n" |
| 27694 | "uchar2 __ovld __cnfn convert_uchar2_rtp(ulong2);\n" |
| 27695 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(ulong2);\n" |
| 27696 | "uchar2 __ovld __cnfn convert_uchar2_rtn(ulong2);\n" |
| 27697 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(ulong2);\n" |
| 27698 | "uchar2 __ovld __cnfn convert_uchar2(ulong2);\n" |
| 27699 | "uchar2 __ovld __cnfn convert_uchar2_sat(ulong2);\n" |
| 27700 | "uchar2 __ovld __cnfn convert_uchar2_rte(float2);\n" |
| 27701 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(float2);\n" |
| 27702 | "uchar2 __ovld __cnfn convert_uchar2_rtz(float2);\n" |
| 27703 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(float2);\n" |
| 27704 | "uchar2 __ovld __cnfn convert_uchar2_rtp(float2);\n" |
| 27705 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(float2);\n" |
| 27706 | "uchar2 __ovld __cnfn convert_uchar2_rtn(float2);\n" |
| 27707 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(float2);\n" |
| 27708 | "uchar2 __ovld __cnfn convert_uchar2(float2);\n" |
| 27709 | "uchar2 __ovld __cnfn convert_uchar2_sat(float2);\n" |
| 27710 | "short2 __ovld __cnfn convert_short2_rte(char2);\n" |
| 27711 | "short2 __ovld __cnfn convert_short2_sat_rte(char2);\n" |
| 27712 | "short2 __ovld __cnfn convert_short2_rtz(char2);\n" |
| 27713 | "short2 __ovld __cnfn convert_short2_sat_rtz(char2);\n" |
| 27714 | "short2 __ovld __cnfn convert_short2_rtp(char2);\n" |
| 27715 | "short2 __ovld __cnfn convert_short2_sat_rtp(char2);\n" |
| 27716 | "short2 __ovld __cnfn convert_short2_rtn(char2);\n" |
| 27717 | "short2 __ovld __cnfn convert_short2_sat_rtn(char2);\n" |
| 27718 | "short2 __ovld __cnfn convert_short2(char2);\n" |
| 27719 | "short2 __ovld __cnfn convert_short2_sat(char2);\n" |
| 27720 | "short2 __ovld __cnfn convert_short2_rte(uchar2);\n" |
| 27721 | "short2 __ovld __cnfn convert_short2_sat_rte(uchar2);\n" |
| 27722 | "short2 __ovld __cnfn convert_short2_rtz(uchar2);\n" |
| 27723 | "short2 __ovld __cnfn convert_short2_sat_rtz(uchar2);\n" |
| 27724 | "short2 __ovld __cnfn convert_short2_rtp(uchar2);\n" |
| 27725 | "short2 __ovld __cnfn convert_short2_sat_rtp(uchar2);\n" |
| 27726 | "short2 __ovld __cnfn convert_short2_rtn(uchar2);\n" |
| 27727 | "short2 __ovld __cnfn convert_short2_sat_rtn(uchar2);\n" |
| 27728 | "short2 __ovld __cnfn convert_short2(uchar2);\n" |
| 27729 | "short2 __ovld __cnfn convert_short2_sat(uchar2);\n" |
| 27730 | "short2 __ovld __cnfn convert_short2_rte(short2);\n" |
| 27731 | "short2 __ovld __cnfn convert_short2_sat_rte(short2);\n" |
| 27732 | "short2 __ovld __cnfn convert_short2_rtz(short2);\n" |
| 27733 | "short2 __ovld __cnfn convert_short2_sat_rtz(short2);\n" |
| 27734 | "short2 __ovld __cnfn convert_short2_rtp(short2);\n" |
| 27735 | "short2 __ovld __cnfn convert_short2_sat_rtp(short2);\n" |
| 27736 | "short2 __ovld __cnfn convert_short2_rtn(short2);\n" |
| 27737 | "short2 __ovld __cnfn convert_short2_sat_rtn(short2);\n" |
| 27738 | "short2 __ovld __cnfn convert_short2(short2);\n" |
| 27739 | "short2 __ovld __cnfn convert_short2_sat(short2);\n" |
| 27740 | "short2 __ovld __cnfn convert_short2_rte(ushort2);\n" |
| 27741 | "short2 __ovld __cnfn convert_short2_sat_rte(ushort2);\n" |
| 27742 | "short2 __ovld __cnfn convert_short2_rtz(ushort2);\n" |
| 27743 | "short2 __ovld __cnfn convert_short2_sat_rtz(ushort2);\n" |
| 27744 | "short2 __ovld __cnfn convert_short2_rtp(ushort2);\n" |
| 27745 | "short2 __ovld __cnfn convert_short2_sat_rtp(ushort2);\n" |
| 27746 | "short2 __ovld __cnfn convert_short2_rtn(ushort2);\n" |
| 27747 | "short2 __ovld __cnfn convert_short2_sat_rtn(ushort2);\n" |
| 27748 | "short2 __ovld __cnfn convert_short2(ushort2);\n" |
| 27749 | "short2 __ovld __cnfn convert_short2_sat(ushort2);\n" |
| 27750 | "short2 __ovld __cnfn convert_short2_rte(int2);\n" |
| 27751 | "short2 __ovld __cnfn convert_short2_sat_rte(int2);\n" |
| 27752 | "short2 __ovld __cnfn convert_short2_rtz(int2);\n" |
| 27753 | "short2 __ovld __cnfn convert_short2_sat_rtz(int2);\n" |
| 27754 | "short2 __ovld __cnfn convert_short2_rtp(int2);\n" |
| 27755 | "short2 __ovld __cnfn convert_short2_sat_rtp(int2);\n" |
| 27756 | "short2 __ovld __cnfn convert_short2_rtn(int2);\n" |
| 27757 | "short2 __ovld __cnfn convert_short2_sat_rtn(int2);\n" |
| 27758 | "short2 __ovld __cnfn convert_short2(int2);\n" |
| 27759 | "short2 __ovld __cnfn convert_short2_sat(int2);\n" |
| 27760 | "short2 __ovld __cnfn convert_short2_rte(uint2);\n" |
| 27761 | "short2 __ovld __cnfn convert_short2_sat_rte(uint2);\n" |
| 27762 | "short2 __ovld __cnfn convert_short2_rtz(uint2);\n" |
| 27763 | "short2 __ovld __cnfn convert_short2_sat_rtz(uint2);\n" |
| 27764 | "short2 __ovld __cnfn convert_short2_rtp(uint2);\n" |
| 27765 | "short2 __ovld __cnfn convert_short2_sat_rtp(uint2);\n" |
| 27766 | "short2 __ovld __cnfn convert_short2_rtn(uint2);\n" |
| 27767 | "short2 __ovld __cnfn convert_short2_sat_rtn(uint2);\n" |
| 27768 | "short2 __ovld __cnfn convert_short2(uint2);\n" |
| 27769 | "short2 __ovld __cnfn convert_short2_sat(uint2);\n" |
| 27770 | "short2 __ovld __cnfn convert_short2_rte(long2);\n" |
| 27771 | "short2 __ovld __cnfn convert_short2_sat_rte(long2);\n" |
| 27772 | "short2 __ovld __cnfn convert_short2_rtz(long2);\n" |
| 27773 | "short2 __ovld __cnfn convert_short2_sat_rtz(long2);\n" |
| 27774 | "short2 __ovld __cnfn convert_short2_rtp(long2);\n" |
| 27775 | "short2 __ovld __cnfn convert_short2_sat_rtp(long2);\n" |
| 27776 | "short2 __ovld __cnfn convert_short2_rtn(long2);\n" |
| 27777 | "short2 __ovld __cnfn convert_short2_sat_rtn(long2);\n" |
| 27778 | "short2 __ovld __cnfn convert_short2(long2);\n" |
| 27779 | "short2 __ovld __cnfn convert_short2_sat(long2);\n" |
| 27780 | "short2 __ovld __cnfn convert_short2_rte(ulong2);\n" |
| 27781 | "short2 __ovld __cnfn convert_short2_sat_rte(ulong2);\n" |
| 27782 | "short2 __ovld __cnfn convert_short2_rtz(ulong2);\n" |
| 27783 | "short2 __ovld __cnfn convert_short2_sat_rtz(ulong2);\n" |
| 27784 | "short2 __ovld __cnfn convert_short2_rtp(ulong2);\n" |
| 27785 | "short2 __ovld __cnfn convert_short2_sat_rtp(ulong2);\n" |
| 27786 | "short2 __ovld __cnfn convert_short2_rtn(ulong2);\n" |
| 27787 | "short2 __ovld __cnfn convert_short2_sat_rtn(ulong2);\n" |
| 27788 | "short2 __ovld __cnfn convert_short2(ulong2);\n" |
| 27789 | "short2 __ovld __cnfn convert_short2_sat(ulong2);\n" |
| 27790 | "short2 __ovld __cnfn convert_short2_rte(float2);\n" |
| 27791 | "short2 __ovld __cnfn convert_short2_sat_rte(float2);\n" |
| 27792 | "short2 __ovld __cnfn convert_short2_rtz(float2);\n" |
| 27793 | "short2 __ovld __cnfn convert_short2_sat_rtz(float2);\n" |
| 27794 | "short2 __ovld __cnfn convert_short2_rtp(float2);\n" |
| 27795 | "short2 __ovld __cnfn convert_short2_sat_rtp(float2);\n" |
| 27796 | "short2 __ovld __cnfn convert_short2_rtn(float2);\n" |
| 27797 | "short2 __ovld __cnfn convert_short2_sat_rtn(float2);\n" |
| 27798 | "short2 __ovld __cnfn convert_short2(float2);\n" |
| 27799 | "short2 __ovld __cnfn convert_short2_sat(float2);\n" |
| 27800 | "ushort2 __ovld __cnfn convert_ushort2_rte(char2);\n" |
| 27801 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(char2);\n" |
| 27802 | "ushort2 __ovld __cnfn convert_ushort2_rtz(char2);\n" |
| 27803 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(char2);\n" |
| 27804 | "ushort2 __ovld __cnfn convert_ushort2_rtp(char2);\n" |
| 27805 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(char2);\n" |
| 27806 | "ushort2 __ovld __cnfn convert_ushort2_rtn(char2);\n" |
| 27807 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(char2);\n" |
| 27808 | "ushort2 __ovld __cnfn convert_ushort2(char2);\n" |
| 27809 | "ushort2 __ovld __cnfn convert_ushort2_sat(char2);\n" |
| 27810 | "ushort2 __ovld __cnfn convert_ushort2_rte(uchar2);\n" |
| 27811 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(uchar2);\n" |
| 27812 | "ushort2 __ovld __cnfn convert_ushort2_rtz(uchar2);\n" |
| 27813 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uchar2);\n" |
| 27814 | "ushort2 __ovld __cnfn convert_ushort2_rtp(uchar2);\n" |
| 27815 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uchar2);\n" |
| 27816 | "ushort2 __ovld __cnfn convert_ushort2_rtn(uchar2);\n" |
| 27817 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uchar2);\n" |
| 27818 | "ushort2 __ovld __cnfn convert_ushort2(uchar2);\n" |
| 27819 | "ushort2 __ovld __cnfn convert_ushort2_sat(uchar2);\n" |
| 27820 | "ushort2 __ovld __cnfn convert_ushort2_rte(short2);\n" |
| 27821 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(short2);\n" |
| 27822 | "ushort2 __ovld __cnfn convert_ushort2_rtz(short2);\n" |
| 27823 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(short2);\n" |
| 27824 | "ushort2 __ovld __cnfn convert_ushort2_rtp(short2);\n" |
| 27825 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(short2);\n" |
| 27826 | "ushort2 __ovld __cnfn convert_ushort2_rtn(short2);\n" |
| 27827 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(short2);\n" |
| 27828 | "ushort2 __ovld __cnfn convert_ushort2(short2);\n" |
| 27829 | "ushort2 __ovld __cnfn convert_ushort2_sat(short2);\n" |
| 27830 | "ushort2 __ovld __cnfn convert_ushort2_rte(ushort2);\n" |
| 27831 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(ushort2);\n" |
| 27832 | "ushort2 __ovld __cnfn convert_ushort2_rtz(ushort2);\n" |
| 27833 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ushort2);\n" |
| 27834 | "ushort2 __ovld __cnfn convert_ushort2_rtp(ushort2);\n" |
| 27835 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ushort2);\n" |
| 27836 | "ushort2 __ovld __cnfn convert_ushort2_rtn(ushort2);\n" |
| 27837 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ushort2);\n" |
| 27838 | "ushort2 __ovld __cnfn convert_ushort2(ushort2);\n" |
| 27839 | "ushort2 __ovld __cnfn convert_ushort2_sat(ushort2);\n" |
| 27840 | "ushort2 __ovld __cnfn convert_ushort2_rte(int2);\n" |
| 27841 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(int2);\n" |
| 27842 | "ushort2 __ovld __cnfn convert_ushort2_rtz(int2);\n" |
| 27843 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(int2);\n" |
| 27844 | "ushort2 __ovld __cnfn convert_ushort2_rtp(int2);\n" |
| 27845 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(int2);\n" |
| 27846 | "ushort2 __ovld __cnfn convert_ushort2_rtn(int2);\n" |
| 27847 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(int2);\n" |
| 27848 | "ushort2 __ovld __cnfn convert_ushort2(int2);\n" |
| 27849 | "ushort2 __ovld __cnfn convert_ushort2_sat(int2);\n" |
| 27850 | "ushort2 __ovld __cnfn convert_ushort2_rte(uint2);\n" |
| 27851 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(uint2);\n" |
| 27852 | "ushort2 __ovld __cnfn convert_ushort2_rtz(uint2);\n" |
| 27853 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(uint2);\n" |
| 27854 | "ushort2 __ovld __cnfn convert_ushort2_rtp(uint2);\n" |
| 27855 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(uint2);\n" |
| 27856 | "ushort2 __ovld __cnfn convert_ushort2_rtn(uint2);\n" |
| 27857 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(uint2);\n" |
| 27858 | "ushort2 __ovld __cnfn convert_ushort2(uint2);\n" |
| 27859 | "ushort2 __ovld __cnfn convert_ushort2_sat(uint2);\n" |
| 27860 | "ushort2 __ovld __cnfn convert_ushort2_rte(long2);\n" |
| 27861 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(long2);\n" |
| 27862 | "ushort2 __ovld __cnfn convert_ushort2_rtz(long2);\n" |
| 27863 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(long2);\n" |
| 27864 | "ushort2 __ovld __cnfn convert_ushort2_rtp(long2);\n" |
| 27865 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(long2);\n" |
| 27866 | "ushort2 __ovld __cnfn convert_ushort2_rtn(long2);\n" |
| 27867 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(long2);\n" |
| 27868 | "ushort2 __ovld __cnfn convert_ushort2(long2);\n" |
| 27869 | "ushort2 __ovld __cnfn convert_ushort2_sat(long2);\n" |
| 27870 | "ushort2 __ovld __cnfn convert_ushort2_rte(ulong2);\n" |
| 27871 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(ulong2);\n" |
| 27872 | "ushort2 __ovld __cnfn convert_ushort2_rtz(ulong2);\n" |
| 27873 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(ulong2);\n" |
| 27874 | "ushort2 __ovld __cnfn convert_ushort2_rtp(ulong2);\n" |
| 27875 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(ulong2);\n" |
| 27876 | "ushort2 __ovld __cnfn convert_ushort2_rtn(ulong2);\n" |
| 27877 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(ulong2);\n" |
| 27878 | "ushort2 __ovld __cnfn convert_ushort2(ulong2);\n" |
| 27879 | "ushort2 __ovld __cnfn convert_ushort2_sat(ulong2);\n" |
| 27880 | "ushort2 __ovld __cnfn convert_ushort2_rte(float2);\n" |
| 27881 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(float2);\n" |
| 27882 | "ushort2 __ovld __cnfn convert_ushort2_rtz(float2);\n" |
| 27883 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(float2);\n" |
| 27884 | "ushort2 __ovld __cnfn convert_ushort2_rtp(float2);\n" |
| 27885 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(float2);\n" |
| 27886 | "ushort2 __ovld __cnfn convert_ushort2_rtn(float2);\n" |
| 27887 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(float2);\n" |
| 27888 | "ushort2 __ovld __cnfn convert_ushort2(float2);\n" |
| 27889 | "ushort2 __ovld __cnfn convert_ushort2_sat(float2);\n" |
| 27890 | "int2 __ovld __cnfn convert_int2_rte(char2);\n" |
| 27891 | "int2 __ovld __cnfn convert_int2_sat_rte(char2);\n" |
| 27892 | "int2 __ovld __cnfn convert_int2_rtz(char2);\n" |
| 27893 | "int2 __ovld __cnfn convert_int2_sat_rtz(char2);\n" |
| 27894 | "int2 __ovld __cnfn convert_int2_rtp(char2);\n" |
| 27895 | "int2 __ovld __cnfn convert_int2_sat_rtp(char2);\n" |
| 27896 | "int2 __ovld __cnfn convert_int2_rtn(char2);\n" |
| 27897 | "int2 __ovld __cnfn convert_int2_sat_rtn(char2);\n" |
| 27898 | "int2 __ovld __cnfn convert_int2(char2);\n" |
| 27899 | "int2 __ovld __cnfn convert_int2_sat(char2);\n" |
| 27900 | "int2 __ovld __cnfn convert_int2_rte(uchar2);\n" |
| 27901 | "int2 __ovld __cnfn convert_int2_sat_rte(uchar2);\n" |
| 27902 | "int2 __ovld __cnfn convert_int2_rtz(uchar2);\n" |
| 27903 | "int2 __ovld __cnfn convert_int2_sat_rtz(uchar2);\n" |
| 27904 | "int2 __ovld __cnfn convert_int2_rtp(uchar2);\n" |
| 27905 | "int2 __ovld __cnfn convert_int2_sat_rtp(uchar2);\n" |
| 27906 | "int2 __ovld __cnfn convert_int2_rtn(uchar2);\n" |
| 27907 | "int2 __ovld __cnfn convert_int2_sat_rtn(uchar2);\n" |
| 27908 | "int2 __ovld __cnfn convert_int2(uchar2);\n" |
| 27909 | "int2 __ovld __cnfn convert_int2_sat(uchar2);\n" |
| 27910 | "int2 __ovld __cnfn convert_int2_rte(short2);\n" |
| 27911 | "int2 __ovld __cnfn convert_int2_sat_rte(short2);\n" |
| 27912 | "int2 __ovld __cnfn convert_int2_rtz(short2);\n" |
| 27913 | "int2 __ovld __cnfn convert_int2_sat_rtz(short2);\n" |
| 27914 | "int2 __ovld __cnfn convert_int2_rtp(short2);\n" |
| 27915 | "int2 __ovld __cnfn convert_int2_sat_rtp(short2);\n" |
| 27916 | "int2 __ovld __cnfn convert_int2_rtn(short2);\n" |
| 27917 | "int2 __ovld __cnfn convert_int2_sat_rtn(short2);\n" |
| 27918 | "int2 __ovld __cnfn convert_int2(short2);\n" |
| 27919 | "int2 __ovld __cnfn convert_int2_sat(short2);\n" |
| 27920 | "int2 __ovld __cnfn convert_int2_rte(ushort2);\n" |
| 27921 | "int2 __ovld __cnfn convert_int2_sat_rte(ushort2);\n" |
| 27922 | "int2 __ovld __cnfn convert_int2_rtz(ushort2);\n" |
| 27923 | "int2 __ovld __cnfn convert_int2_sat_rtz(ushort2);\n" |
| 27924 | "int2 __ovld __cnfn convert_int2_rtp(ushort2);\n" |
| 27925 | "int2 __ovld __cnfn convert_int2_sat_rtp(ushort2);\n" |
| 27926 | "int2 __ovld __cnfn convert_int2_rtn(ushort2);\n" |
| 27927 | "int2 __ovld __cnfn convert_int2_sat_rtn(ushort2);\n" |
| 27928 | "int2 __ovld __cnfn convert_int2(ushort2);\n" |
| 27929 | "int2 __ovld __cnfn convert_int2_sat(ushort2);\n" |
| 27930 | "int2 __ovld __cnfn convert_int2_rte(int2);\n" |
| 27931 | "int2 __ovld __cnfn convert_int2_sat_rte(int2);\n" |
| 27932 | "int2 __ovld __cnfn convert_int2_rtz(int2);\n" |
| 27933 | "int2 __ovld __cnfn convert_int2_sat_rtz(int2);\n" |
| 27934 | "int2 __ovld __cnfn convert_int2_rtp(int2);\n" |
| 27935 | "int2 __ovld __cnfn convert_int2_sat_rtp(int2);\n" |
| 27936 | "int2 __ovld __cnfn convert_int2_rtn(int2);\n" |
| 27937 | "int2 __ovld __cnfn convert_int2_sat_rtn(int2);\n" |
| 27938 | "int2 __ovld __cnfn convert_int2(int2);\n" |
| 27939 | "int2 __ovld __cnfn convert_int2_sat(int2);\n" |
| 27940 | "int2 __ovld __cnfn convert_int2_rte(uint2);\n" |
| 27941 | "int2 __ovld __cnfn convert_int2_sat_rte(uint2);\n" |
| 27942 | "int2 __ovld __cnfn convert_int2_rtz(uint2);\n" |
| 27943 | "int2 __ovld __cnfn convert_int2_sat_rtz(uint2);\n" |
| 27944 | "int2 __ovld __cnfn convert_int2_rtp(uint2);\n" |
| 27945 | "int2 __ovld __cnfn convert_int2_sat_rtp(uint2);\n" |
| 27946 | "int2 __ovld __cnfn convert_int2_rtn(uint2);\n" |
| 27947 | "int2 __ovld __cnfn convert_int2_sat_rtn(uint2);\n" |
| 27948 | "int2 __ovld __cnfn convert_int2(uint2);\n" |
| 27949 | "int2 __ovld __cnfn convert_int2_sat(uint2);\n" |
| 27950 | "int2 __ovld __cnfn convert_int2_rte(long2);\n" |
| 27951 | "int2 __ovld __cnfn convert_int2_sat_rte(long2);\n" |
| 27952 | "int2 __ovld __cnfn convert_int2_rtz(long2);\n" |
| 27953 | "int2 __ovld __cnfn convert_int2_sat_rtz(long2);\n" |
| 27954 | "int2 __ovld __cnfn convert_int2_rtp(long2);\n" |
| 27955 | "int2 __ovld __cnfn convert_int2_sat_rtp(long2);\n" |
| 27956 | "int2 __ovld __cnfn convert_int2_rtn(long2);\n" |
| 27957 | "int2 __ovld __cnfn convert_int2_sat_rtn(long2);\n" |
| 27958 | "int2 __ovld __cnfn convert_int2(long2);\n" |
| 27959 | "int2 __ovld __cnfn convert_int2_sat(long2);\n" |
| 27960 | "int2 __ovld __cnfn convert_int2_rte(ulong2);\n" |
| 27961 | "int2 __ovld __cnfn convert_int2_sat_rte(ulong2);\n" |
| 27962 | "int2 __ovld __cnfn convert_int2_rtz(ulong2);\n" |
| 27963 | "int2 __ovld __cnfn convert_int2_sat_rtz(ulong2);\n" |
| 27964 | "int2 __ovld __cnfn convert_int2_rtp(ulong2);\n" |
| 27965 | "int2 __ovld __cnfn convert_int2_sat_rtp(ulong2);\n" |
| 27966 | "int2 __ovld __cnfn convert_int2_rtn(ulong2);\n" |
| 27967 | "int2 __ovld __cnfn convert_int2_sat_rtn(ulong2);\n" |
| 27968 | "int2 __ovld __cnfn convert_int2(ulong2);\n" |
| 27969 | "int2 __ovld __cnfn convert_int2_sat(ulong2);\n" |
| 27970 | "int2 __ovld __cnfn convert_int2_rte(float2);\n" |
| 27971 | "int2 __ovld __cnfn convert_int2_sat_rte(float2);\n" |
| 27972 | "int2 __ovld __cnfn convert_int2_rtz(float2);\n" |
| 27973 | "int2 __ovld __cnfn convert_int2_sat_rtz(float2);\n" |
| 27974 | "int2 __ovld __cnfn convert_int2_rtp(float2);\n" |
| 27975 | "int2 __ovld __cnfn convert_int2_sat_rtp(float2);\n" |
| 27976 | "int2 __ovld __cnfn convert_int2_rtn(float2);\n" |
| 27977 | "int2 __ovld __cnfn convert_int2_sat_rtn(float2);\n" |
| 27978 | "int2 __ovld __cnfn convert_int2(float2);\n" |
| 27979 | "int2 __ovld __cnfn convert_int2_sat(float2);\n" |
| 27980 | "uint2 __ovld __cnfn convert_uint2_rte(char2);\n" |
| 27981 | "uint2 __ovld __cnfn convert_uint2_sat_rte(char2);\n" |
| 27982 | "uint2 __ovld __cnfn convert_uint2_rtz(char2);\n" |
| 27983 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(char2);\n" |
| 27984 | "uint2 __ovld __cnfn convert_uint2_rtp(char2);\n" |
| 27985 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(char2);\n" |
| 27986 | "uint2 __ovld __cnfn convert_uint2_rtn(char2);\n" |
| 27987 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(char2);\n" |
| 27988 | "uint2 __ovld __cnfn convert_uint2(char2);\n" |
| 27989 | "uint2 __ovld __cnfn convert_uint2_sat(char2);\n" |
| 27990 | "uint2 __ovld __cnfn convert_uint2_rte(uchar2);\n" |
| 27991 | "uint2 __ovld __cnfn convert_uint2_sat_rte(uchar2);\n" |
| 27992 | "uint2 __ovld __cnfn convert_uint2_rtz(uchar2);\n" |
| 27993 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(uchar2);\n" |
| 27994 | "uint2 __ovld __cnfn convert_uint2_rtp(uchar2);\n" |
| 27995 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(uchar2);\n" |
| 27996 | "uint2 __ovld __cnfn convert_uint2_rtn(uchar2);\n" |
| 27997 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(uchar2);\n" |
| 27998 | "uint2 __ovld __cnfn convert_uint2(uchar2);\n" |
| 27999 | "uint2 __ovld __cnfn convert_uint2_sat(uchar2);\n" |
| 28000 | "uint2 __ovld __cnfn convert_uint2_rte(short2);\n" |
| 28001 | "uint2 __ovld __cnfn convert_uint2_sat_rte(short2);\n" |
| 28002 | "uint2 __ovld __cnfn convert_uint2_rtz(short2);\n" |
| 28003 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(short2);\n" |
| 28004 | "uint2 __ovld __cnfn convert_uint2_rtp(short2);\n" |
| 28005 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(short2);\n" |
| 28006 | "uint2 __ovld __cnfn convert_uint2_rtn(short2);\n" |
| 28007 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(short2);\n" |
| 28008 | "uint2 __ovld __cnfn convert_uint2(short2);\n" |
| 28009 | "uint2 __ovld __cnfn convert_uint2_sat(short2);\n" |
| 28010 | "uint2 __ovld __cnfn convert_uint2_rte(ushort2);\n" |
| 28011 | "uint2 __ovld __cnfn convert_uint2_sat_rte(ushort2);\n" |
| 28012 | "uint2 __ovld __cnfn convert_uint2_rtz(ushort2);\n" |
| 28013 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(ushort2);\n" |
| 28014 | "uint2 __ovld __cnfn convert_uint2_rtp(ushort2);\n" |
| 28015 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(ushort2);\n" |
| 28016 | "uint2 __ovld __cnfn convert_uint2_rtn(ushort2);\n" |
| 28017 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(ushort2);\n" |
| 28018 | "uint2 __ovld __cnfn convert_uint2(ushort2);\n" |
| 28019 | "uint2 __ovld __cnfn convert_uint2_sat(ushort2);\n" |
| 28020 | "uint2 __ovld __cnfn convert_uint2_rte(int2);\n" |
| 28021 | "uint2 __ovld __cnfn convert_uint2_sat_rte(int2);\n" |
| 28022 | "uint2 __ovld __cnfn convert_uint2_rtz(int2);\n" |
| 28023 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(int2);\n" |
| 28024 | "uint2 __ovld __cnfn convert_uint2_rtp(int2);\n" |
| 28025 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(int2);\n" |
| 28026 | "uint2 __ovld __cnfn convert_uint2_rtn(int2);\n" |
| 28027 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(int2);\n" |
| 28028 | "uint2 __ovld __cnfn convert_uint2(int2);\n" |
| 28029 | "uint2 __ovld __cnfn convert_uint2_sat(int2);\n" |
| 28030 | "uint2 __ovld __cnfn convert_uint2_rte(uint2);\n" |
| 28031 | "uint2 __ovld __cnfn convert_uint2_sat_rte(uint2);\n" |
| 28032 | "uint2 __ovld __cnfn convert_uint2_rtz(uint2);\n" |
| 28033 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(uint2);\n" |
| 28034 | "uint2 __ovld __cnfn convert_uint2_rtp(uint2);\n" |
| 28035 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(uint2);\n" |
| 28036 | "uint2 __ovld __cnfn convert_uint2_rtn(uint2);\n" |
| 28037 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(uint2);\n" |
| 28038 | "uint2 __ovld __cnfn convert_uint2(uint2);\n" |
| 28039 | "uint2 __ovld __cnfn convert_uint2_sat(uint2);\n" |
| 28040 | "uint2 __ovld __cnfn convert_uint2_rte(long2);\n" |
| 28041 | "uint2 __ovld __cnfn convert_uint2_sat_rte(long2);\n" |
| 28042 | "uint2 __ovld __cnfn convert_uint2_rtz(long2);\n" |
| 28043 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(long2);\n" |
| 28044 | "uint2 __ovld __cnfn convert_uint2_rtp(long2);\n" |
| 28045 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(long2);\n" |
| 28046 | "uint2 __ovld __cnfn convert_uint2_rtn(long2);\n" |
| 28047 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(long2);\n" |
| 28048 | "uint2 __ovld __cnfn convert_uint2(long2);\n" |
| 28049 | "uint2 __ovld __cnfn convert_uint2_sat(long2);\n" |
| 28050 | "uint2 __ovld __cnfn convert_uint2_rte(ulong2);\n" |
| 28051 | "uint2 __ovld __cnfn convert_uint2_sat_rte(ulong2);\n" |
| 28052 | "uint2 __ovld __cnfn convert_uint2_rtz(ulong2);\n" |
| 28053 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(ulong2);\n" |
| 28054 | "uint2 __ovld __cnfn convert_uint2_rtp(ulong2);\n" |
| 28055 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(ulong2);\n" |
| 28056 | "uint2 __ovld __cnfn convert_uint2_rtn(ulong2);\n" |
| 28057 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(ulong2);\n" |
| 28058 | "uint2 __ovld __cnfn convert_uint2(ulong2);\n" |
| 28059 | "uint2 __ovld __cnfn convert_uint2_sat(ulong2);\n" |
| 28060 | "uint2 __ovld __cnfn convert_uint2_rte(float2);\n" |
| 28061 | "uint2 __ovld __cnfn convert_uint2_sat_rte(float2);\n" |
| 28062 | "uint2 __ovld __cnfn convert_uint2_rtz(float2);\n" |
| 28063 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(float2);\n" |
| 28064 | "uint2 __ovld __cnfn convert_uint2_rtp(float2);\n" |
| 28065 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(float2);\n" |
| 28066 | "uint2 __ovld __cnfn convert_uint2_rtn(float2);\n" |
| 28067 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(float2);\n" |
| 28068 | "uint2 __ovld __cnfn convert_uint2(float2);\n" |
| 28069 | "uint2 __ovld __cnfn convert_uint2_sat(float2);\n" |
| 28070 | "long2 __ovld __cnfn convert_long2_rte(char2);\n" |
| 28071 | "long2 __ovld __cnfn convert_long2_sat_rte(char2);\n" |
| 28072 | "long2 __ovld __cnfn convert_long2_rtz(char2);\n" |
| 28073 | "long2 __ovld __cnfn convert_long2_sat_rtz(char2);\n" |
| 28074 | "long2 __ovld __cnfn convert_long2_rtp(char2);\n" |
| 28075 | "long2 __ovld __cnfn convert_long2_sat_rtp(char2);\n" |
| 28076 | "long2 __ovld __cnfn convert_long2_rtn(char2);\n" |
| 28077 | "long2 __ovld __cnfn convert_long2_sat_rtn(char2);\n" |
| 28078 | "long2 __ovld __cnfn convert_long2(char2);\n" |
| 28079 | "long2 __ovld __cnfn convert_long2_sat(char2);\n" |
| 28080 | "long2 __ovld __cnfn convert_long2_rte(uchar2);\n" |
| 28081 | "long2 __ovld __cnfn convert_long2_sat_rte(uchar2);\n" |
| 28082 | "long2 __ovld __cnfn convert_long2_rtz(uchar2);\n" |
| 28083 | "long2 __ovld __cnfn convert_long2_sat_rtz(uchar2);\n" |
| 28084 | "long2 __ovld __cnfn convert_long2_rtp(uchar2);\n" |
| 28085 | "long2 __ovld __cnfn convert_long2_sat_rtp(uchar2);\n" |
| 28086 | "long2 __ovld __cnfn convert_long2_rtn(uchar2);\n" |
| 28087 | "long2 __ovld __cnfn convert_long2_sat_rtn(uchar2);\n" |
| 28088 | "long2 __ovld __cnfn convert_long2(uchar2);\n" |
| 28089 | "long2 __ovld __cnfn convert_long2_sat(uchar2);\n" |
| 28090 | "long2 __ovld __cnfn convert_long2_rte(short2);\n" |
| 28091 | "long2 __ovld __cnfn convert_long2_sat_rte(short2);\n" |
| 28092 | "long2 __ovld __cnfn convert_long2_rtz(short2);\n" |
| 28093 | "long2 __ovld __cnfn convert_long2_sat_rtz(short2);\n" |
| 28094 | "long2 __ovld __cnfn convert_long2_rtp(short2);\n" |
| 28095 | "long2 __ovld __cnfn convert_long2_sat_rtp(short2);\n" |
| 28096 | "long2 __ovld __cnfn convert_long2_rtn(short2);\n" |
| 28097 | "long2 __ovld __cnfn convert_long2_sat_rtn(short2);\n" |
| 28098 | "long2 __ovld __cnfn convert_long2(short2);\n" |
| 28099 | "long2 __ovld __cnfn convert_long2_sat(short2);\n" |
| 28100 | "long2 __ovld __cnfn convert_long2_rte(ushort2);\n" |
| 28101 | "long2 __ovld __cnfn convert_long2_sat_rte(ushort2);\n" |
| 28102 | "long2 __ovld __cnfn convert_long2_rtz(ushort2);\n" |
| 28103 | "long2 __ovld __cnfn convert_long2_sat_rtz(ushort2);\n" |
| 28104 | "long2 __ovld __cnfn convert_long2_rtp(ushort2);\n" |
| 28105 | "long2 __ovld __cnfn convert_long2_sat_rtp(ushort2);\n" |
| 28106 | "long2 __ovld __cnfn convert_long2_rtn(ushort2);\n" |
| 28107 | "long2 __ovld __cnfn convert_long2_sat_rtn(ushort2);\n" |
| 28108 | "long2 __ovld __cnfn convert_long2(ushort2);\n" |
| 28109 | "long2 __ovld __cnfn convert_long2_sat(ushort2);\n" |
| 28110 | "long2 __ovld __cnfn convert_long2_rte(int2);\n" |
| 28111 | "long2 __ovld __cnfn convert_long2_sat_rte(int2);\n" |
| 28112 | "long2 __ovld __cnfn convert_long2_rtz(int2);\n" |
| 28113 | "long2 __ovld __cnfn convert_long2_sat_rtz(int2);\n" |
| 28114 | "long2 __ovld __cnfn convert_long2_rtp(int2);\n" |
| 28115 | "long2 __ovld __cnfn convert_long2_sat_rtp(int2);\n" |
| 28116 | "long2 __ovld __cnfn convert_long2_rtn(int2);\n" |
| 28117 | "long2 __ovld __cnfn convert_long2_sat_rtn(int2);\n" |
| 28118 | "long2 __ovld __cnfn convert_long2(int2);\n" |
| 28119 | "long2 __ovld __cnfn convert_long2_sat(int2);\n" |
| 28120 | "long2 __ovld __cnfn convert_long2_rte(uint2);\n" |
| 28121 | "long2 __ovld __cnfn convert_long2_sat_rte(uint2);\n" |
| 28122 | "long2 __ovld __cnfn convert_long2_rtz(uint2);\n" |
| 28123 | "long2 __ovld __cnfn convert_long2_sat_rtz(uint2);\n" |
| 28124 | "long2 __ovld __cnfn convert_long2_rtp(uint2);\n" |
| 28125 | "long2 __ovld __cnfn convert_long2_sat_rtp(uint2);\n" |
| 28126 | "long2 __ovld __cnfn convert_long2_rtn(uint2);\n" |
| 28127 | "long2 __ovld __cnfn convert_long2_sat_rtn(uint2);\n" |
| 28128 | "long2 __ovld __cnfn convert_long2(uint2);\n" |
| 28129 | "long2 __ovld __cnfn convert_long2_sat(uint2);\n" |
| 28130 | "long2 __ovld __cnfn convert_long2_rte(long2);\n" |
| 28131 | "long2 __ovld __cnfn convert_long2_sat_rte(long2);\n" |
| 28132 | "long2 __ovld __cnfn convert_long2_rtz(long2);\n" |
| 28133 | "long2 __ovld __cnfn convert_long2_sat_rtz(long2);\n" |
| 28134 | "long2 __ovld __cnfn convert_long2_rtp(long2);\n" |
| 28135 | "long2 __ovld __cnfn convert_long2_sat_rtp(long2);\n" |
| 28136 | "long2 __ovld __cnfn convert_long2_rtn(long2);\n" |
| 28137 | "long2 __ovld __cnfn convert_long2_sat_rtn(long2);\n" |
| 28138 | "long2 __ovld __cnfn convert_long2(long2);\n" |
| 28139 | "long2 __ovld __cnfn convert_long2_sat(long2);\n" |
| 28140 | "long2 __ovld __cnfn convert_long2_rte(ulong2);\n" |
| 28141 | "long2 __ovld __cnfn convert_long2_sat_rte(ulong2);\n" |
| 28142 | "long2 __ovld __cnfn convert_long2_rtz(ulong2);\n" |
| 28143 | "long2 __ovld __cnfn convert_long2_sat_rtz(ulong2);\n" |
| 28144 | "long2 __ovld __cnfn convert_long2_rtp(ulong2);\n" |
| 28145 | "long2 __ovld __cnfn convert_long2_sat_rtp(ulong2);\n" |
| 28146 | "long2 __ovld __cnfn convert_long2_rtn(ulong2);\n" |
| 28147 | "long2 __ovld __cnfn convert_long2_sat_rtn(ulong2);\n" |
| 28148 | "long2 __ovld __cnfn convert_long2(ulong2);\n" |
| 28149 | "long2 __ovld __cnfn convert_long2_sat(ulong2);\n" |
| 28150 | "long2 __ovld __cnfn convert_long2_rte(float2);\n" |
| 28151 | "long2 __ovld __cnfn convert_long2_sat_rte(float2);\n" |
| 28152 | "long2 __ovld __cnfn convert_long2_rtz(float2);\n" |
| 28153 | "long2 __ovld __cnfn convert_long2_sat_rtz(float2);\n" |
| 28154 | "long2 __ovld __cnfn convert_long2_rtp(float2);\n" |
| 28155 | "long2 __ovld __cnfn convert_long2_sat_rtp(float2);\n" |
| 28156 | "long2 __ovld __cnfn convert_long2_rtn(float2);\n" |
| 28157 | "long2 __ovld __cnfn convert_long2_sat_rtn(float2);\n" |
| 28158 | "long2 __ovld __cnfn convert_long2(float2);\n" |
| 28159 | "long2 __ovld __cnfn convert_long2_sat(float2);\n" |
| 28160 | "ulong2 __ovld __cnfn convert_ulong2_rte(char2);\n" |
| 28161 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(char2);\n" |
| 28162 | "ulong2 __ovld __cnfn convert_ulong2_rtz(char2);\n" |
| 28163 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(char2);\n" |
| 28164 | "ulong2 __ovld __cnfn convert_ulong2_rtp(char2);\n" |
| 28165 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(char2);\n" |
| 28166 | "ulong2 __ovld __cnfn convert_ulong2_rtn(char2);\n" |
| 28167 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(char2);\n" |
| 28168 | "ulong2 __ovld __cnfn convert_ulong2(char2);\n" |
| 28169 | "ulong2 __ovld __cnfn convert_ulong2_sat(char2);\n" |
| 28170 | "ulong2 __ovld __cnfn convert_ulong2_rte(uchar2);\n" |
| 28171 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(uchar2);\n" |
| 28172 | "ulong2 __ovld __cnfn convert_ulong2_rtz(uchar2);\n" |
| 28173 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uchar2);\n" |
| 28174 | "ulong2 __ovld __cnfn convert_ulong2_rtp(uchar2);\n" |
| 28175 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uchar2);\n" |
| 28176 | "ulong2 __ovld __cnfn convert_ulong2_rtn(uchar2);\n" |
| 28177 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uchar2);\n" |
| 28178 | "ulong2 __ovld __cnfn convert_ulong2(uchar2);\n" |
| 28179 | "ulong2 __ovld __cnfn convert_ulong2_sat(uchar2);\n" |
| 28180 | "ulong2 __ovld __cnfn convert_ulong2_rte(short2);\n" |
| 28181 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(short2);\n" |
| 28182 | "ulong2 __ovld __cnfn convert_ulong2_rtz(short2);\n" |
| 28183 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(short2);\n" |
| 28184 | "ulong2 __ovld __cnfn convert_ulong2_rtp(short2);\n" |
| 28185 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(short2);\n" |
| 28186 | "ulong2 __ovld __cnfn convert_ulong2_rtn(short2);\n" |
| 28187 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(short2);\n" |
| 28188 | "ulong2 __ovld __cnfn convert_ulong2(short2);\n" |
| 28189 | "ulong2 __ovld __cnfn convert_ulong2_sat(short2);\n" |
| 28190 | "ulong2 __ovld __cnfn convert_ulong2_rte(ushort2);\n" |
| 28191 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(ushort2);\n" |
| 28192 | "ulong2 __ovld __cnfn convert_ulong2_rtz(ushort2);\n" |
| 28193 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ushort2);\n" |
| 28194 | "ulong2 __ovld __cnfn convert_ulong2_rtp(ushort2);\n" |
| 28195 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ushort2);\n" |
| 28196 | "ulong2 __ovld __cnfn convert_ulong2_rtn(ushort2);\n" |
| 28197 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ushort2);\n" |
| 28198 | "ulong2 __ovld __cnfn convert_ulong2(ushort2);\n" |
| 28199 | "ulong2 __ovld __cnfn convert_ulong2_sat(ushort2);\n" |
| 28200 | "ulong2 __ovld __cnfn convert_ulong2_rte(int2);\n" |
| 28201 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(int2);\n" |
| 28202 | "ulong2 __ovld __cnfn convert_ulong2_rtz(int2);\n" |
| 28203 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(int2);\n" |
| 28204 | "ulong2 __ovld __cnfn convert_ulong2_rtp(int2);\n" |
| 28205 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(int2);\n" |
| 28206 | "ulong2 __ovld __cnfn convert_ulong2_rtn(int2);\n" |
| 28207 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(int2);\n" |
| 28208 | "ulong2 __ovld __cnfn convert_ulong2(int2);\n" |
| 28209 | "ulong2 __ovld __cnfn convert_ulong2_sat(int2);\n" |
| 28210 | "ulong2 __ovld __cnfn convert_ulong2_rte(uint2);\n" |
| 28211 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(uint2);\n" |
| 28212 | "ulong2 __ovld __cnfn convert_ulong2_rtz(uint2);\n" |
| 28213 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(uint2);\n" |
| 28214 | "ulong2 __ovld __cnfn convert_ulong2_rtp(uint2);\n" |
| 28215 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(uint2);\n" |
| 28216 | "ulong2 __ovld __cnfn convert_ulong2_rtn(uint2);\n" |
| 28217 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(uint2);\n" |
| 28218 | "ulong2 __ovld __cnfn convert_ulong2(uint2);\n" |
| 28219 | "ulong2 __ovld __cnfn convert_ulong2_sat(uint2);\n" |
| 28220 | "ulong2 __ovld __cnfn convert_ulong2_rte(long2);\n" |
| 28221 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(long2);\n" |
| 28222 | "ulong2 __ovld __cnfn convert_ulong2_rtz(long2);\n" |
| 28223 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(long2);\n" |
| 28224 | "ulong2 __ovld __cnfn convert_ulong2_rtp(long2);\n" |
| 28225 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(long2);\n" |
| 28226 | "ulong2 __ovld __cnfn convert_ulong2_rtn(long2);\n" |
| 28227 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(long2);\n" |
| 28228 | "ulong2 __ovld __cnfn convert_ulong2(long2);\n" |
| 28229 | "ulong2 __ovld __cnfn convert_ulong2_sat(long2);\n" |
| 28230 | "ulong2 __ovld __cnfn convert_ulong2_rte(ulong2);\n" |
| 28231 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(ulong2);\n" |
| 28232 | "ulong2 __ovld __cnfn convert_ulong2_rtz(ulong2);\n" |
| 28233 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(ulong2);\n" |
| 28234 | "ulong2 __ovld __cnfn convert_ulong2_rtp(ulong2);\n" |
| 28235 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(ulong2);\n" |
| 28236 | "ulong2 __ovld __cnfn convert_ulong2_rtn(ulong2);\n" |
| 28237 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(ulong2);\n" |
| 28238 | "ulong2 __ovld __cnfn convert_ulong2(ulong2);\n" |
| 28239 | "ulong2 __ovld __cnfn convert_ulong2_sat(ulong2);\n" |
| 28240 | "ulong2 __ovld __cnfn convert_ulong2_rte(float2);\n" |
| 28241 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(float2);\n" |
| 28242 | "ulong2 __ovld __cnfn convert_ulong2_rtz(float2);\n" |
| 28243 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(float2);\n" |
| 28244 | "ulong2 __ovld __cnfn convert_ulong2_rtp(float2);\n" |
| 28245 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(float2);\n" |
| 28246 | "ulong2 __ovld __cnfn convert_ulong2_rtn(float2);\n" |
| 28247 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(float2);\n" |
| 28248 | "ulong2 __ovld __cnfn convert_ulong2(float2);\n" |
| 28249 | "ulong2 __ovld __cnfn convert_ulong2_sat(float2);\n" |
| 28250 | "float2 __ovld __cnfn convert_float2_rte(char2);\n" |
| 28251 | "float2 __ovld __cnfn convert_float2_rtz(char2);\n" |
| 28252 | "float2 __ovld __cnfn convert_float2_rtp(char2);\n" |
| 28253 | "float2 __ovld __cnfn convert_float2_rtn(char2);\n" |
| 28254 | "float2 __ovld __cnfn convert_float2(char2);\n" |
| 28255 | "float2 __ovld __cnfn convert_float2_rte(uchar2);\n" |
| 28256 | "float2 __ovld __cnfn convert_float2_rtz(uchar2);\n" |
| 28257 | "float2 __ovld __cnfn convert_float2_rtp(uchar2);\n" |
| 28258 | "float2 __ovld __cnfn convert_float2_rtn(uchar2);\n" |
| 28259 | "float2 __ovld __cnfn convert_float2(uchar2);\n" |
| 28260 | "float2 __ovld __cnfn convert_float2_rte(short2);\n" |
| 28261 | "float2 __ovld __cnfn convert_float2_rtz(short2);\n" |
| 28262 | "float2 __ovld __cnfn convert_float2_rtp(short2);\n" |
| 28263 | "float2 __ovld __cnfn convert_float2_rtn(short2);\n" |
| 28264 | "float2 __ovld __cnfn convert_float2(short2);\n" |
| 28265 | "float2 __ovld __cnfn convert_float2_rte(ushort2);\n" |
| 28266 | "float2 __ovld __cnfn convert_float2_rtz(ushort2);\n" |
| 28267 | "float2 __ovld __cnfn convert_float2_rtp(ushort2);\n" |
| 28268 | "float2 __ovld __cnfn convert_float2_rtn(ushort2);\n" |
| 28269 | "float2 __ovld __cnfn convert_float2(ushort2);\n" |
| 28270 | "float2 __ovld __cnfn convert_float2_rte(int2);\n" |
| 28271 | "float2 __ovld __cnfn convert_float2_rtz(int2);\n" |
| 28272 | "float2 __ovld __cnfn convert_float2_rtp(int2);\n" |
| 28273 | "float2 __ovld __cnfn convert_float2_rtn(int2);\n" |
| 28274 | "float2 __ovld __cnfn convert_float2(int2);\n" |
| 28275 | "float2 __ovld __cnfn convert_float2_rte(uint2);\n" |
| 28276 | "float2 __ovld __cnfn convert_float2_rtz(uint2);\n" |
| 28277 | "float2 __ovld __cnfn convert_float2_rtp(uint2);\n" |
| 28278 | "float2 __ovld __cnfn convert_float2_rtn(uint2);\n" |
| 28279 | "float2 __ovld __cnfn convert_float2(uint2);\n" |
| 28280 | "float2 __ovld __cnfn convert_float2_rte(long2);\n" |
| 28281 | "float2 __ovld __cnfn convert_float2_rtz(long2);\n" |
| 28282 | "float2 __ovld __cnfn convert_float2_rtp(long2);\n" |
| 28283 | "float2 __ovld __cnfn convert_float2_rtn(long2);\n" |
| 28284 | "float2 __ovld __cnfn convert_float2(long2);\n" |
| 28285 | "float2 __ovld __cnfn convert_float2_rte(ulong2);\n" |
| 28286 | "float2 __ovld __cnfn convert_float2_rtz(ulong2);\n" |
| 28287 | "float2 __ovld __cnfn convert_float2_rtp(ulong2);\n" |
| 28288 | "float2 __ovld __cnfn convert_float2_rtn(ulong2);\n" |
| 28289 | "float2 __ovld __cnfn convert_float2(ulong2);\n" |
| 28290 | "float2 __ovld __cnfn convert_float2_rte(float2);\n" |
| 28291 | "float2 __ovld __cnfn convert_float2_rtz(float2);\n" |
| 28292 | "float2 __ovld __cnfn convert_float2_rtp(float2);\n" |
| 28293 | "float2 __ovld __cnfn convert_float2_rtn(float2);\n" |
| 28294 | "float2 __ovld __cnfn convert_float2(float2);\n" |
| 28295 | "char3 __ovld __cnfn convert_char3_rte(char3);\n" |
| 28296 | "char3 __ovld __cnfn convert_char3_sat_rte(char3);\n" |
| 28297 | "char3 __ovld __cnfn convert_char3_rtz(char3);\n" |
| 28298 | "char3 __ovld __cnfn convert_char3_sat_rtz(char3);\n" |
| 28299 | "char3 __ovld __cnfn convert_char3_rtp(char3);\n" |
| 28300 | "char3 __ovld __cnfn convert_char3_sat_rtp(char3);\n" |
| 28301 | "char3 __ovld __cnfn convert_char3_rtn(char3);\n" |
| 28302 | "char3 __ovld __cnfn convert_char3_sat_rtn(char3);\n" |
| 28303 | "char3 __ovld __cnfn convert_char3(char3);\n" |
| 28304 | "char3 __ovld __cnfn convert_char3_sat(char3);\n" |
| 28305 | "char3 __ovld __cnfn convert_char3_rte(uchar3);\n" |
| 28306 | "char3 __ovld __cnfn convert_char3_sat_rte(uchar3);\n" |
| 28307 | "char3 __ovld __cnfn convert_char3_rtz(uchar3);\n" |
| 28308 | "char3 __ovld __cnfn convert_char3_sat_rtz(uchar3);\n" |
| 28309 | "char3 __ovld __cnfn convert_char3_rtp(uchar3);\n" |
| 28310 | "char3 __ovld __cnfn convert_char3_sat_rtp(uchar3);\n" |
| 28311 | "char3 __ovld __cnfn convert_char3_rtn(uchar3);\n" |
| 28312 | "char3 __ovld __cnfn convert_char3_sat_rtn(uchar3);\n" |
| 28313 | "char3 __ovld __cnfn convert_char3(uchar3);\n" |
| 28314 | "char3 __ovld __cnfn convert_char3_sat(uchar3);\n" |
| 28315 | "char3 __ovld __cnfn convert_char3_rte(short3);\n" |
| 28316 | "char3 __ovld __cnfn convert_char3_sat_rte(short3);\n" |
| 28317 | "char3 __ovld __cnfn convert_char3_rtz(short3);\n" |
| 28318 | "char3 __ovld __cnfn convert_char3_sat_rtz(short3);\n" |
| 28319 | "char3 __ovld __cnfn convert_char3_rtp(short3);\n" |
| 28320 | "char3 __ovld __cnfn convert_char3_sat_rtp(short3);\n" |
| 28321 | "char3 __ovld __cnfn convert_char3_rtn(short3);\n" |
| 28322 | "char3 __ovld __cnfn convert_char3_sat_rtn(short3);\n" |
| 28323 | "char3 __ovld __cnfn convert_char3(short3);\n" |
| 28324 | "char3 __ovld __cnfn convert_char3_sat(short3);\n" |
| 28325 | "char3 __ovld __cnfn convert_char3_rte(ushort3);\n" |
| 28326 | "char3 __ovld __cnfn convert_char3_sat_rte(ushort3);\n" |
| 28327 | "char3 __ovld __cnfn convert_char3_rtz(ushort3);\n" |
| 28328 | "char3 __ovld __cnfn convert_char3_sat_rtz(ushort3);\n" |
| 28329 | "char3 __ovld __cnfn convert_char3_rtp(ushort3);\n" |
| 28330 | "char3 __ovld __cnfn convert_char3_sat_rtp(ushort3);\n" |
| 28331 | "char3 __ovld __cnfn convert_char3_rtn(ushort3);\n" |
| 28332 | "char3 __ovld __cnfn convert_char3_sat_rtn(ushort3);\n" |
| 28333 | "char3 __ovld __cnfn convert_char3(ushort3);\n" |
| 28334 | "char3 __ovld __cnfn convert_char3_sat(ushort3);\n" |
| 28335 | "char3 __ovld __cnfn convert_char3_rte(int3);\n" |
| 28336 | "char3 __ovld __cnfn convert_char3_sat_rte(int3);\n" |
| 28337 | "char3 __ovld __cnfn convert_char3_rtz(int3);\n" |
| 28338 | "char3 __ovld __cnfn convert_char3_sat_rtz(int3);\n" |
| 28339 | "char3 __ovld __cnfn convert_char3_rtp(int3);\n" |
| 28340 | "char3 __ovld __cnfn convert_char3_sat_rtp(int3);\n" |
| 28341 | "char3 __ovld __cnfn convert_char3_rtn(int3);\n" |
| 28342 | "char3 __ovld __cnfn convert_char3_sat_rtn(int3);\n" |
| 28343 | "char3 __ovld __cnfn convert_char3(int3);\n" |
| 28344 | "char3 __ovld __cnfn convert_char3_sat(int3);\n" |
| 28345 | "char3 __ovld __cnfn convert_char3_rte(uint3);\n" |
| 28346 | "char3 __ovld __cnfn convert_char3_sat_rte(uint3);\n" |
| 28347 | "char3 __ovld __cnfn convert_char3_rtz(uint3);\n" |
| 28348 | "char3 __ovld __cnfn convert_char3_sat_rtz(uint3);\n" |
| 28349 | "char3 __ovld __cnfn convert_char3_rtp(uint3);\n" |
| 28350 | "char3 __ovld __cnfn convert_char3_sat_rtp(uint3);\n" |
| 28351 | "char3 __ovld __cnfn convert_char3_rtn(uint3);\n" |
| 28352 | "char3 __ovld __cnfn convert_char3_sat_rtn(uint3);\n" |
| 28353 | "char3 __ovld __cnfn convert_char3(uint3);\n" |
| 28354 | "char3 __ovld __cnfn convert_char3_sat(uint3);\n" |
| 28355 | "char3 __ovld __cnfn convert_char3_rte(long3);\n" |
| 28356 | "char3 __ovld __cnfn convert_char3_sat_rte(long3);\n" |
| 28357 | "char3 __ovld __cnfn convert_char3_rtz(long3);\n" |
| 28358 | "char3 __ovld __cnfn convert_char3_sat_rtz(long3);\n" |
| 28359 | "char3 __ovld __cnfn convert_char3_rtp(long3);\n" |
| 28360 | "char3 __ovld __cnfn convert_char3_sat_rtp(long3);\n" |
| 28361 | "char3 __ovld __cnfn convert_char3_rtn(long3);\n" |
| 28362 | "char3 __ovld __cnfn convert_char3_sat_rtn(long3);\n" |
| 28363 | "char3 __ovld __cnfn convert_char3(long3);\n" |
| 28364 | "char3 __ovld __cnfn convert_char3_sat(long3);\n" |
| 28365 | "char3 __ovld __cnfn convert_char3_rte(ulong3);\n" |
| 28366 | "char3 __ovld __cnfn convert_char3_sat_rte(ulong3);\n" |
| 28367 | "char3 __ovld __cnfn convert_char3_rtz(ulong3);\n" |
| 28368 | "char3 __ovld __cnfn convert_char3_sat_rtz(ulong3);\n" |
| 28369 | "char3 __ovld __cnfn convert_char3_rtp(ulong3);\n" |
| 28370 | "char3 __ovld __cnfn convert_char3_sat_rtp(ulong3);\n" |
| 28371 | "char3 __ovld __cnfn convert_char3_rtn(ulong3);\n" |
| 28372 | "char3 __ovld __cnfn convert_char3_sat_rtn(ulong3);\n" |
| 28373 | "char3 __ovld __cnfn convert_char3(ulong3);\n" |
| 28374 | "char3 __ovld __cnfn convert_char3_sat(ulong3);\n" |
| 28375 | "char3 __ovld __cnfn convert_char3_rte(float3);\n" |
| 28376 | "char3 __ovld __cnfn convert_char3_sat_rte(float3);\n" |
| 28377 | "char3 __ovld __cnfn convert_char3_rtz(float3);\n" |
| 28378 | "char3 __ovld __cnfn convert_char3_sat_rtz(float3);\n" |
| 28379 | "char3 __ovld __cnfn convert_char3_rtp(float3);\n" |
| 28380 | "char3 __ovld __cnfn convert_char3_sat_rtp(float3);\n" |
| 28381 | "char3 __ovld __cnfn convert_char3_rtn(float3);\n" |
| 28382 | "char3 __ovld __cnfn convert_char3_sat_rtn(float3);\n" |
| 28383 | "char3 __ovld __cnfn convert_char3(float3);\n" |
| 28384 | "char3 __ovld __cnfn convert_char3_sat(float3);\n" |
| 28385 | "uchar3 __ovld __cnfn convert_uchar3_rte(char3);\n" |
| 28386 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(char3);\n" |
| 28387 | "uchar3 __ovld __cnfn convert_uchar3_rtz(char3);\n" |
| 28388 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(char3);\n" |
| 28389 | "uchar3 __ovld __cnfn convert_uchar3_rtp(char3);\n" |
| 28390 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(char3);\n" |
| 28391 | "uchar3 __ovld __cnfn convert_uchar3_rtn(char3);\n" |
| 28392 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(char3);\n" |
| 28393 | "uchar3 __ovld __cnfn convert_uchar3(char3);\n" |
| 28394 | "uchar3 __ovld __cnfn convert_uchar3_sat(char3);\n" |
| 28395 | "uchar3 __ovld __cnfn convert_uchar3_rte(uchar3);\n" |
| 28396 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(uchar3);\n" |
| 28397 | "uchar3 __ovld __cnfn convert_uchar3_rtz(uchar3);\n" |
| 28398 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uchar3);\n" |
| 28399 | "uchar3 __ovld __cnfn convert_uchar3_rtp(uchar3);\n" |
| 28400 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uchar3);\n" |
| 28401 | "uchar3 __ovld __cnfn convert_uchar3_rtn(uchar3);\n" |
| 28402 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uchar3);\n" |
| 28403 | "uchar3 __ovld __cnfn convert_uchar3(uchar3);\n" |
| 28404 | "uchar3 __ovld __cnfn convert_uchar3_sat(uchar3);\n" |
| 28405 | "uchar3 __ovld __cnfn convert_uchar3_rte(short3);\n" |
| 28406 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(short3);\n" |
| 28407 | "uchar3 __ovld __cnfn convert_uchar3_rtz(short3);\n" |
| 28408 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(short3);\n" |
| 28409 | "uchar3 __ovld __cnfn convert_uchar3_rtp(short3);\n" |
| 28410 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(short3);\n" |
| 28411 | "uchar3 __ovld __cnfn convert_uchar3_rtn(short3);\n" |
| 28412 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(short3);\n" |
| 28413 | "uchar3 __ovld __cnfn convert_uchar3(short3);\n" |
| 28414 | "uchar3 __ovld __cnfn convert_uchar3_sat(short3);\n" |
| 28415 | "uchar3 __ovld __cnfn convert_uchar3_rte(ushort3);\n" |
| 28416 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(ushort3);\n" |
| 28417 | "uchar3 __ovld __cnfn convert_uchar3_rtz(ushort3);\n" |
| 28418 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ushort3);\n" |
| 28419 | "uchar3 __ovld __cnfn convert_uchar3_rtp(ushort3);\n" |
| 28420 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ushort3);\n" |
| 28421 | "uchar3 __ovld __cnfn convert_uchar3_rtn(ushort3);\n" |
| 28422 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ushort3);\n" |
| 28423 | "uchar3 __ovld __cnfn convert_uchar3(ushort3);\n" |
| 28424 | "uchar3 __ovld __cnfn convert_uchar3_sat(ushort3);\n" |
| 28425 | "uchar3 __ovld __cnfn convert_uchar3_rte(int3);\n" |
| 28426 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(int3);\n" |
| 28427 | "uchar3 __ovld __cnfn convert_uchar3_rtz(int3);\n" |
| 28428 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(int3);\n" |
| 28429 | "uchar3 __ovld __cnfn convert_uchar3_rtp(int3);\n" |
| 28430 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(int3);\n" |
| 28431 | "uchar3 __ovld __cnfn convert_uchar3_rtn(int3);\n" |
| 28432 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(int3);\n" |
| 28433 | "uchar3 __ovld __cnfn convert_uchar3(int3);\n" |
| 28434 | "uchar3 __ovld __cnfn convert_uchar3_sat(int3);\n" |
| 28435 | "uchar3 __ovld __cnfn convert_uchar3_rte(uint3);\n" |
| 28436 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(uint3);\n" |
| 28437 | "uchar3 __ovld __cnfn convert_uchar3_rtz(uint3);\n" |
| 28438 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(uint3);\n" |
| 28439 | "uchar3 __ovld __cnfn convert_uchar3_rtp(uint3);\n" |
| 28440 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(uint3);\n" |
| 28441 | "uchar3 __ovld __cnfn convert_uchar3_rtn(uint3);\n" |
| 28442 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(uint3);\n" |
| 28443 | "uchar3 __ovld __cnfn convert_uchar3(uint3);\n" |
| 28444 | "uchar3 __ovld __cnfn convert_uchar3_sat(uint3);\n" |
| 28445 | "uchar3 __ovld __cnfn convert_uchar3_rte(long3);\n" |
| 28446 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(long3);\n" |
| 28447 | "uchar3 __ovld __cnfn convert_uchar3_rtz(long3);\n" |
| 28448 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(long3);\n" |
| 28449 | "uchar3 __ovld __cnfn convert_uchar3_rtp(long3);\n" |
| 28450 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(long3);\n" |
| 28451 | "uchar3 __ovld __cnfn convert_uchar3_rtn(long3);\n" |
| 28452 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(long3);\n" |
| 28453 | "uchar3 __ovld __cnfn convert_uchar3(long3);\n" |
| 28454 | "uchar3 __ovld __cnfn convert_uchar3_sat(long3);\n" |
| 28455 | "uchar3 __ovld __cnfn convert_uchar3_rte(ulong3);\n" |
| 28456 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(ulong3);\n" |
| 28457 | "uchar3 __ovld __cnfn convert_uchar3_rtz(ulong3);\n" |
| 28458 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(ulong3);\n" |
| 28459 | "uchar3 __ovld __cnfn convert_uchar3_rtp(ulong3);\n" |
| 28460 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(ulong3);\n" |
| 28461 | "uchar3 __ovld __cnfn convert_uchar3_rtn(ulong3);\n" |
| 28462 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(ulong3);\n" |
| 28463 | "uchar3 __ovld __cnfn convert_uchar3(ulong3);\n" |
| 28464 | "uchar3 __ovld __cnfn convert_uchar3_sat(ulong3);\n" |
| 28465 | "uchar3 __ovld __cnfn convert_uchar3_rte(float3);\n" |
| 28466 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(float3);\n" |
| 28467 | "uchar3 __ovld __cnfn convert_uchar3_rtz(float3);\n" |
| 28468 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(float3);\n" |
| 28469 | "uchar3 __ovld __cnfn convert_uchar3_rtp(float3);\n" |
| 28470 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(float3);\n" |
| 28471 | "uchar3 __ovld __cnfn convert_uchar3_rtn(float3);\n" |
| 28472 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(float3);\n" |
| 28473 | "uchar3 __ovld __cnfn convert_uchar3(float3);\n" |
| 28474 | "uchar3 __ovld __cnfn convert_uchar3_sat(float3);\n" |
| 28475 | "short3 __ovld __cnfn convert_short3_rte(char3);\n" |
| 28476 | "short3 __ovld __cnfn convert_short3_sat_rte(char3);\n" |
| 28477 | "short3 __ovld __cnfn convert_short3_rtz(char3);\n" |
| 28478 | "short3 __ovld __cnfn convert_short3_sat_rtz(char3);\n" |
| 28479 | "short3 __ovld __cnfn convert_short3_rtp(char3);\n" |
| 28480 | "short3 __ovld __cnfn convert_short3_sat_rtp(char3);\n" |
| 28481 | "short3 __ovld __cnfn convert_short3_rtn(char3);\n" |
| 28482 | "short3 __ovld __cnfn convert_short3_sat_rtn(char3);\n" |
| 28483 | "short3 __ovld __cnfn convert_short3(char3);\n" |
| 28484 | "short3 __ovld __cnfn convert_short3_sat(char3);\n" |
| 28485 | "short3 __ovld __cnfn convert_short3_rte(uchar3);\n" |
| 28486 | "short3 __ovld __cnfn convert_short3_sat_rte(uchar3);\n" |
| 28487 | "short3 __ovld __cnfn convert_short3_rtz(uchar3);\n" |
| 28488 | "short3 __ovld __cnfn convert_short3_sat_rtz(uchar3);\n" |
| 28489 | "short3 __ovld __cnfn convert_short3_rtp(uchar3);\n" |
| 28490 | "short3 __ovld __cnfn convert_short3_sat_rtp(uchar3);\n" |
| 28491 | "short3 __ovld __cnfn convert_short3_rtn(uchar3);\n" |
| 28492 | "short3 __ovld __cnfn convert_short3_sat_rtn(uchar3);\n" |
| 28493 | "short3 __ovld __cnfn convert_short3(uchar3);\n" |
| 28494 | "short3 __ovld __cnfn convert_short3_sat(uchar3);\n" |
| 28495 | "short3 __ovld __cnfn convert_short3_rte(short3);\n" |
| 28496 | "short3 __ovld __cnfn convert_short3_sat_rte(short3);\n" |
| 28497 | "short3 __ovld __cnfn convert_short3_rtz(short3);\n" |
| 28498 | "short3 __ovld __cnfn convert_short3_sat_rtz(short3);\n" |
| 28499 | "short3 __ovld __cnfn convert_short3_rtp(short3);\n" |
| 28500 | "short3 __ovld __cnfn convert_short3_sat_rtp(short3);\n" |
| 28501 | "short3 __ovld __cnfn convert_short3_rtn(short3);\n" |
| 28502 | "short3 __ovld __cnfn convert_short3_sat_rtn(short3);\n" |
| 28503 | "short3 __ovld __cnfn convert_short3(short3);\n" |
| 28504 | "short3 __ovld __cnfn convert_short3_sat(short3);\n" |
| 28505 | "short3 __ovld __cnfn convert_short3_rte(ushort3);\n" |
| 28506 | "short3 __ovld __cnfn convert_short3_sat_rte(ushort3);\n" |
| 28507 | "short3 __ovld __cnfn convert_short3_rtz(ushort3);\n" |
| 28508 | "short3 __ovld __cnfn convert_short3_sat_rtz(ushort3);\n" |
| 28509 | "short3 __ovld __cnfn convert_short3_rtp(ushort3);\n" |
| 28510 | "short3 __ovld __cnfn convert_short3_sat_rtp(ushort3);\n" |
| 28511 | "short3 __ovld __cnfn convert_short3_rtn(ushort3);\n" |
| 28512 | "short3 __ovld __cnfn convert_short3_sat_rtn(ushort3);\n" |
| 28513 | "short3 __ovld __cnfn convert_short3(ushort3);\n" |
| 28514 | "short3 __ovld __cnfn convert_short3_sat(ushort3);\n" |
| 28515 | "short3 __ovld __cnfn convert_short3_rte(int3);\n" |
| 28516 | "short3 __ovld __cnfn convert_short3_sat_rte(int3);\n" |
| 28517 | "short3 __ovld __cnfn convert_short3_rtz(int3);\n" |
| 28518 | "short3 __ovld __cnfn convert_short3_sat_rtz(int3);\n" |
| 28519 | "short3 __ovld __cnfn convert_short3_rtp(int3);\n" |
| 28520 | "short3 __ovld __cnfn convert_short3_sat_rtp(int3);\n" |
| 28521 | "short3 __ovld __cnfn convert_short3_rtn(int3);\n" |
| 28522 | "short3 __ovld __cnfn convert_short3_sat_rtn(int3);\n" |
| 28523 | "short3 __ovld __cnfn convert_short3(int3);\n" |
| 28524 | "short3 __ovld __cnfn convert_short3_sat(int3);\n" |
| 28525 | "short3 __ovld __cnfn convert_short3_rte(uint3);\n" |
| 28526 | "short3 __ovld __cnfn convert_short3_sat_rte(uint3);\n" |
| 28527 | "short3 __ovld __cnfn convert_short3_rtz(uint3);\n" |
| 28528 | "short3 __ovld __cnfn convert_short3_sat_rtz(uint3);\n" |
| 28529 | "short3 __ovld __cnfn convert_short3_rtp(uint3);\n" |
| 28530 | "short3 __ovld __cnfn convert_short3_sat_rtp(uint3);\n" |
| 28531 | "short3 __ovld __cnfn convert_short3_rtn(uint3);\n" |
| 28532 | "short3 __ovld __cnfn convert_short3_sat_rtn(uint3);\n" |
| 28533 | "short3 __ovld __cnfn convert_short3(uint3);\n" |
| 28534 | "short3 __ovld __cnfn convert_short3_sat(uint3);\n" |
| 28535 | "short3 __ovld __cnfn convert_short3_rte(long3);\n" |
| 28536 | "short3 __ovld __cnfn convert_short3_sat_rte(long3);\n" |
| 28537 | "short3 __ovld __cnfn convert_short3_rtz(long3);\n" |
| 28538 | "short3 __ovld __cnfn convert_short3_sat_rtz(long3);\n" |
| 28539 | "short3 __ovld __cnfn convert_short3_rtp(long3);\n" |
| 28540 | "short3 __ovld __cnfn convert_short3_sat_rtp(long3);\n" |
| 28541 | "short3 __ovld __cnfn convert_short3_rtn(long3);\n" |
| 28542 | "short3 __ovld __cnfn convert_short3_sat_rtn(long3);\n" |
| 28543 | "short3 __ovld __cnfn convert_short3(long3);\n" |
| 28544 | "short3 __ovld __cnfn convert_short3_sat(long3);\n" |
| 28545 | "short3 __ovld __cnfn convert_short3_rte(ulong3);\n" |
| 28546 | "short3 __ovld __cnfn convert_short3_sat_rte(ulong3);\n" |
| 28547 | "short3 __ovld __cnfn convert_short3_rtz(ulong3);\n" |
| 28548 | "short3 __ovld __cnfn convert_short3_sat_rtz(ulong3);\n" |
| 28549 | "short3 __ovld __cnfn convert_short3_rtp(ulong3);\n" |
| 28550 | "short3 __ovld __cnfn convert_short3_sat_rtp(ulong3);\n" |
| 28551 | "short3 __ovld __cnfn convert_short3_rtn(ulong3);\n" |
| 28552 | "short3 __ovld __cnfn convert_short3_sat_rtn(ulong3);\n" |
| 28553 | "short3 __ovld __cnfn convert_short3(ulong3);\n" |
| 28554 | "short3 __ovld __cnfn convert_short3_sat(ulong3);\n" |
| 28555 | "short3 __ovld __cnfn convert_short3_rte(float3);\n" |
| 28556 | "short3 __ovld __cnfn convert_short3_sat_rte(float3);\n" |
| 28557 | "short3 __ovld __cnfn convert_short3_rtz(float3);\n" |
| 28558 | "short3 __ovld __cnfn convert_short3_sat_rtz(float3);\n" |
| 28559 | "short3 __ovld __cnfn convert_short3_rtp(float3);\n" |
| 28560 | "short3 __ovld __cnfn convert_short3_sat_rtp(float3);\n" |
| 28561 | "short3 __ovld __cnfn convert_short3_rtn(float3);\n" |
| 28562 | "short3 __ovld __cnfn convert_short3_sat_rtn(float3);\n" |
| 28563 | "short3 __ovld __cnfn convert_short3(float3);\n" |
| 28564 | "short3 __ovld __cnfn convert_short3_sat(float3);\n" |
| 28565 | "ushort3 __ovld __cnfn convert_ushort3_rte(char3);\n" |
| 28566 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(char3);\n" |
| 28567 | "ushort3 __ovld __cnfn convert_ushort3_rtz(char3);\n" |
| 28568 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(char3);\n" |
| 28569 | "ushort3 __ovld __cnfn convert_ushort3_rtp(char3);\n" |
| 28570 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(char3);\n" |
| 28571 | "ushort3 __ovld __cnfn convert_ushort3_rtn(char3);\n" |
| 28572 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(char3);\n" |
| 28573 | "ushort3 __ovld __cnfn convert_ushort3(char3);\n" |
| 28574 | "ushort3 __ovld __cnfn convert_ushort3_sat(char3);\n" |
| 28575 | "ushort3 __ovld __cnfn convert_ushort3_rte(uchar3);\n" |
| 28576 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(uchar3);\n" |
| 28577 | "ushort3 __ovld __cnfn convert_ushort3_rtz(uchar3);\n" |
| 28578 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uchar3);\n" |
| 28579 | "ushort3 __ovld __cnfn convert_ushort3_rtp(uchar3);\n" |
| 28580 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uchar3);\n" |
| 28581 | "ushort3 __ovld __cnfn convert_ushort3_rtn(uchar3);\n" |
| 28582 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uchar3);\n" |
| 28583 | "ushort3 __ovld __cnfn convert_ushort3(uchar3);\n" |
| 28584 | "ushort3 __ovld __cnfn convert_ushort3_sat(uchar3);\n" |
| 28585 | "ushort3 __ovld __cnfn convert_ushort3_rte(short3);\n" |
| 28586 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(short3);\n" |
| 28587 | "ushort3 __ovld __cnfn convert_ushort3_rtz(short3);\n" |
| 28588 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(short3);\n" |
| 28589 | "ushort3 __ovld __cnfn convert_ushort3_rtp(short3);\n" |
| 28590 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(short3);\n" |
| 28591 | "ushort3 __ovld __cnfn convert_ushort3_rtn(short3);\n" |
| 28592 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(short3);\n" |
| 28593 | "ushort3 __ovld __cnfn convert_ushort3(short3);\n" |
| 28594 | "ushort3 __ovld __cnfn convert_ushort3_sat(short3);\n" |
| 28595 | "ushort3 __ovld __cnfn convert_ushort3_rte(ushort3);\n" |
| 28596 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(ushort3);\n" |
| 28597 | "ushort3 __ovld __cnfn convert_ushort3_rtz(ushort3);\n" |
| 28598 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ushort3);\n" |
| 28599 | "ushort3 __ovld __cnfn convert_ushort3_rtp(ushort3);\n" |
| 28600 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ushort3);\n" |
| 28601 | "ushort3 __ovld __cnfn convert_ushort3_rtn(ushort3);\n" |
| 28602 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ushort3);\n" |
| 28603 | "ushort3 __ovld __cnfn convert_ushort3(ushort3);\n" |
| 28604 | "ushort3 __ovld __cnfn convert_ushort3_sat(ushort3);\n" |
| 28605 | "ushort3 __ovld __cnfn convert_ushort3_rte(int3);\n" |
| 28606 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(int3);\n" |
| 28607 | "ushort3 __ovld __cnfn convert_ushort3_rtz(int3);\n" |
| 28608 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(int3);\n" |
| 28609 | "ushort3 __ovld __cnfn convert_ushort3_rtp(int3);\n" |
| 28610 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(int3);\n" |
| 28611 | "ushort3 __ovld __cnfn convert_ushort3_rtn(int3);\n" |
| 28612 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(int3);\n" |
| 28613 | "ushort3 __ovld __cnfn convert_ushort3(int3);\n" |
| 28614 | "ushort3 __ovld __cnfn convert_ushort3_sat(int3);\n" |
| 28615 | "ushort3 __ovld __cnfn convert_ushort3_rte(uint3);\n" |
| 28616 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(uint3);\n" |
| 28617 | "ushort3 __ovld __cnfn convert_ushort3_rtz(uint3);\n" |
| 28618 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(uint3);\n" |
| 28619 | "ushort3 __ovld __cnfn convert_ushort3_rtp(uint3);\n" |
| 28620 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(uint3);\n" |
| 28621 | "ushort3 __ovld __cnfn convert_ushort3_rtn(uint3);\n" |
| 28622 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(uint3);\n" |
| 28623 | "ushort3 __ovld __cnfn convert_ushort3(uint3);\n" |
| 28624 | "ushort3 __ovld __cnfn convert_ushort3_sat(uint3);\n" |
| 28625 | "ushort3 __ovld __cnfn convert_ushort3_rte(long3);\n" |
| 28626 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(long3);\n" |
| 28627 | "ushort3 __ovld __cnfn convert_ushort3_rtz(long3);\n" |
| 28628 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(long3);\n" |
| 28629 | "ushort3 __ovld __cnfn convert_ushort3_rtp(long3);\n" |
| 28630 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(long3);\n" |
| 28631 | "ushort3 __ovld __cnfn convert_ushort3_rtn(long3);\n" |
| 28632 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(long3);\n" |
| 28633 | "ushort3 __ovld __cnfn convert_ushort3(long3);\n" |
| 28634 | "ushort3 __ovld __cnfn convert_ushort3_sat(long3);\n" |
| 28635 | "ushort3 __ovld __cnfn convert_ushort3_rte(ulong3);\n" |
| 28636 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(ulong3);\n" |
| 28637 | "ushort3 __ovld __cnfn convert_ushort3_rtz(ulong3);\n" |
| 28638 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(ulong3);\n" |
| 28639 | "ushort3 __ovld __cnfn convert_ushort3_rtp(ulong3);\n" |
| 28640 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(ulong3);\n" |
| 28641 | "ushort3 __ovld __cnfn convert_ushort3_rtn(ulong3);\n" |
| 28642 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(ulong3);\n" |
| 28643 | "ushort3 __ovld __cnfn convert_ushort3(ulong3);\n" |
| 28644 | "ushort3 __ovld __cnfn convert_ushort3_sat(ulong3);\n" |
| 28645 | "ushort3 __ovld __cnfn convert_ushort3_rte(float3);\n" |
| 28646 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(float3);\n" |
| 28647 | "ushort3 __ovld __cnfn convert_ushort3_rtz(float3);\n" |
| 28648 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(float3);\n" |
| 28649 | "ushort3 __ovld __cnfn convert_ushort3_rtp(float3);\n" |
| 28650 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(float3);\n" |
| 28651 | "ushort3 __ovld __cnfn convert_ushort3_rtn(float3);\n" |
| 28652 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(float3);\n" |
| 28653 | "ushort3 __ovld __cnfn convert_ushort3(float3);\n" |
| 28654 | "ushort3 __ovld __cnfn convert_ushort3_sat(float3);\n" |
| 28655 | "int3 __ovld __cnfn convert_int3_rte(char3);\n" |
| 28656 | "int3 __ovld __cnfn convert_int3_sat_rte(char3);\n" |
| 28657 | "int3 __ovld __cnfn convert_int3_rtz(char3);\n" |
| 28658 | "int3 __ovld __cnfn convert_int3_sat_rtz(char3);\n" |
| 28659 | "int3 __ovld __cnfn convert_int3_rtp(char3);\n" |
| 28660 | "int3 __ovld __cnfn convert_int3_sat_rtp(char3);\n" |
| 28661 | "int3 __ovld __cnfn convert_int3_rtn(char3);\n" |
| 28662 | "int3 __ovld __cnfn convert_int3_sat_rtn(char3);\n" |
| 28663 | "int3 __ovld __cnfn convert_int3(char3);\n" |
| 28664 | "int3 __ovld __cnfn convert_int3_sat(char3);\n" |
| 28665 | "int3 __ovld __cnfn convert_int3_rte(uchar3);\n" |
| 28666 | "int3 __ovld __cnfn convert_int3_sat_rte(uchar3);\n" |
| 28667 | "int3 __ovld __cnfn convert_int3_rtz(uchar3);\n" |
| 28668 | "int3 __ovld __cnfn convert_int3_sat_rtz(uchar3);\n" |
| 28669 | "int3 __ovld __cnfn convert_int3_rtp(uchar3);\n" |
| 28670 | "int3 __ovld __cnfn convert_int3_sat_rtp(uchar3);\n" |
| 28671 | "int3 __ovld __cnfn convert_int3_rtn(uchar3);\n" |
| 28672 | "int3 __ovld __cnfn convert_int3_sat_rtn(uchar3);\n" |
| 28673 | "int3 __ovld __cnfn convert_int3(uchar3);\n" |
| 28674 | "int3 __ovld __cnfn convert_int3_sat(uchar3);\n" |
| 28675 | "int3 __ovld __cnfn convert_int3_rte(short3);\n" |
| 28676 | "int3 __ovld __cnfn convert_int3_sat_rte(short3);\n" |
| 28677 | "int3 __ovld __cnfn convert_int3_rtz(short3);\n" |
| 28678 | "int3 __ovld __cnfn convert_int3_sat_rtz(short3);\n" |
| 28679 | "int3 __ovld __cnfn convert_int3_rtp(short3);\n" |
| 28680 | "int3 __ovld __cnfn convert_int3_sat_rtp(short3);\n" |
| 28681 | "int3 __ovld __cnfn convert_int3_rtn(short3);\n" |
| 28682 | "int3 __ovld __cnfn convert_int3_sat_rtn(short3);\n" |
| 28683 | "int3 __ovld __cnfn convert_int3(short3);\n" |
| 28684 | "int3 __ovld __cnfn convert_int3_sat(short3);\n" |
| 28685 | "int3 __ovld __cnfn convert_int3_rte(ushort3);\n" |
| 28686 | "int3 __ovld __cnfn convert_int3_sat_rte(ushort3);\n" |
| 28687 | "int3 __ovld __cnfn convert_int3_rtz(ushort3);\n" |
| 28688 | "int3 __ovld __cnfn convert_int3_sat_rtz(ushort3);\n" |
| 28689 | "int3 __ovld __cnfn convert_int3_rtp(ushort3);\n" |
| 28690 | "int3 __ovld __cnfn convert_int3_sat_rtp(ushort3);\n" |
| 28691 | "int3 __ovld __cnfn convert_int3_rtn(ushort3);\n" |
| 28692 | "int3 __ovld __cnfn convert_int3_sat_rtn(ushort3);\n" |
| 28693 | "int3 __ovld __cnfn convert_int3(ushort3);\n" |
| 28694 | "int3 __ovld __cnfn convert_int3_sat(ushort3);\n" |
| 28695 | "int3 __ovld __cnfn convert_int3_rte(int3);\n" |
| 28696 | "int3 __ovld __cnfn convert_int3_sat_rte(int3);\n" |
| 28697 | "int3 __ovld __cnfn convert_int3_rtz(int3);\n" |
| 28698 | "int3 __ovld __cnfn convert_int3_sat_rtz(int3);\n" |
| 28699 | "int3 __ovld __cnfn convert_int3_rtp(int3);\n" |
| 28700 | "int3 __ovld __cnfn convert_int3_sat_rtp(int3);\n" |
| 28701 | "int3 __ovld __cnfn convert_int3_rtn(int3);\n" |
| 28702 | "int3 __ovld __cnfn convert_int3_sat_rtn(int3);\n" |
| 28703 | "int3 __ovld __cnfn convert_int3(int3);\n" |
| 28704 | "int3 __ovld __cnfn convert_int3_sat(int3);\n" |
| 28705 | "int3 __ovld __cnfn convert_int3_rte(uint3);\n" |
| 28706 | "int3 __ovld __cnfn convert_int3_sat_rte(uint3);\n" |
| 28707 | "int3 __ovld __cnfn convert_int3_rtz(uint3);\n" |
| 28708 | "int3 __ovld __cnfn convert_int3_sat_rtz(uint3);\n" |
| 28709 | "int3 __ovld __cnfn convert_int3_rtp(uint3);\n" |
| 28710 | "int3 __ovld __cnfn convert_int3_sat_rtp(uint3);\n" |
| 28711 | "int3 __ovld __cnfn convert_int3_rtn(uint3);\n" |
| 28712 | "int3 __ovld __cnfn convert_int3_sat_rtn(uint3);\n" |
| 28713 | "int3 __ovld __cnfn convert_int3(uint3);\n" |
| 28714 | "int3 __ovld __cnfn convert_int3_sat(uint3);\n" |
| 28715 | "int3 __ovld __cnfn convert_int3_rte(long3);\n" |
| 28716 | "int3 __ovld __cnfn convert_int3_sat_rte(long3);\n" |
| 28717 | "int3 __ovld __cnfn convert_int3_rtz(long3);\n" |
| 28718 | "int3 __ovld __cnfn convert_int3_sat_rtz(long3);\n" |
| 28719 | "int3 __ovld __cnfn convert_int3_rtp(long3);\n" |
| 28720 | "int3 __ovld __cnfn convert_int3_sat_rtp(long3);\n" |
| 28721 | "int3 __ovld __cnfn convert_int3_rtn(long3);\n" |
| 28722 | "int3 __ovld __cnfn convert_int3_sat_rtn(long3);\n" |
| 28723 | "int3 __ovld __cnfn convert_int3(long3);\n" |
| 28724 | "int3 __ovld __cnfn convert_int3_sat(long3);\n" |
| 28725 | "int3 __ovld __cnfn convert_int3_rte(ulong3);\n" |
| 28726 | "int3 __ovld __cnfn convert_int3_sat_rte(ulong3);\n" |
| 28727 | "int3 __ovld __cnfn convert_int3_rtz(ulong3);\n" |
| 28728 | "int3 __ovld __cnfn convert_int3_sat_rtz(ulong3);\n" |
| 28729 | "int3 __ovld __cnfn convert_int3_rtp(ulong3);\n" |
| 28730 | "int3 __ovld __cnfn convert_int3_sat_rtp(ulong3);\n" |
| 28731 | "int3 __ovld __cnfn convert_int3_rtn(ulong3);\n" |
| 28732 | "int3 __ovld __cnfn convert_int3_sat_rtn(ulong3);\n" |
| 28733 | "int3 __ovld __cnfn convert_int3(ulong3);\n" |
| 28734 | "int3 __ovld __cnfn convert_int3_sat(ulong3);\n" |
| 28735 | "int3 __ovld __cnfn convert_int3_rte(float3);\n" |
| 28736 | "int3 __ovld __cnfn convert_int3_sat_rte(float3);\n" |
| 28737 | "int3 __ovld __cnfn convert_int3_rtz(float3);\n" |
| 28738 | "int3 __ovld __cnfn convert_int3_sat_rtz(float3);\n" |
| 28739 | "int3 __ovld __cnfn convert_int3_rtp(float3);\n" |
| 28740 | "int3 __ovld __cnfn convert_int3_sat_rtp(float3);\n" |
| 28741 | "int3 __ovld __cnfn convert_int3_rtn(float3);\n" |
| 28742 | "int3 __ovld __cnfn convert_int3_sat_rtn(float3);\n" |
| 28743 | "int3 __ovld __cnfn convert_int3(float3);\n" |
| 28744 | "int3 __ovld __cnfn convert_int3_sat(float3);\n" |
| 28745 | "uint3 __ovld __cnfn convert_uint3_rte(char3);\n" |
| 28746 | "uint3 __ovld __cnfn convert_uint3_sat_rte(char3);\n" |
| 28747 | "uint3 __ovld __cnfn convert_uint3_rtz(char3);\n" |
| 28748 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(char3);\n" |
| 28749 | "uint3 __ovld __cnfn convert_uint3_rtp(char3);\n" |
| 28750 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(char3);\n" |
| 28751 | "uint3 __ovld __cnfn convert_uint3_rtn(char3);\n" |
| 28752 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(char3);\n" |
| 28753 | "uint3 __ovld __cnfn convert_uint3(char3);\n" |
| 28754 | "uint3 __ovld __cnfn convert_uint3_sat(char3);\n" |
| 28755 | "uint3 __ovld __cnfn convert_uint3_rte(uchar3);\n" |
| 28756 | "uint3 __ovld __cnfn convert_uint3_sat_rte(uchar3);\n" |
| 28757 | "uint3 __ovld __cnfn convert_uint3_rtz(uchar3);\n" |
| 28758 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(uchar3);\n" |
| 28759 | "uint3 __ovld __cnfn convert_uint3_rtp(uchar3);\n" |
| 28760 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(uchar3);\n" |
| 28761 | "uint3 __ovld __cnfn convert_uint3_rtn(uchar3);\n" |
| 28762 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(uchar3);\n" |
| 28763 | "uint3 __ovld __cnfn convert_uint3(uchar3);\n" |
| 28764 | "uint3 __ovld __cnfn convert_uint3_sat(uchar3);\n" |
| 28765 | "uint3 __ovld __cnfn convert_uint3_rte(short3);\n" |
| 28766 | "uint3 __ovld __cnfn convert_uint3_sat_rte(short3);\n" |
| 28767 | "uint3 __ovld __cnfn convert_uint3_rtz(short3);\n" |
| 28768 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(short3);\n" |
| 28769 | "uint3 __ovld __cnfn convert_uint3_rtp(short3);\n" |
| 28770 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(short3);\n" |
| 28771 | "uint3 __ovld __cnfn convert_uint3_rtn(short3);\n" |
| 28772 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(short3);\n" |
| 28773 | "uint3 __ovld __cnfn convert_uint3(short3);\n" |
| 28774 | "uint3 __ovld __cnfn convert_uint3_sat(short3);\n" |
| 28775 | "uint3 __ovld __cnfn convert_uint3_rte(ushort3);\n" |
| 28776 | "uint3 __ovld __cnfn convert_uint3_sat_rte(ushort3);\n" |
| 28777 | "uint3 __ovld __cnfn convert_uint3_rtz(ushort3);\n" |
| 28778 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(ushort3);\n" |
| 28779 | "uint3 __ovld __cnfn convert_uint3_rtp(ushort3);\n" |
| 28780 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(ushort3);\n" |
| 28781 | "uint3 __ovld __cnfn convert_uint3_rtn(ushort3);\n" |
| 28782 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(ushort3);\n" |
| 28783 | "uint3 __ovld __cnfn convert_uint3(ushort3);\n" |
| 28784 | "uint3 __ovld __cnfn convert_uint3_sat(ushort3);\n" |
| 28785 | "uint3 __ovld __cnfn convert_uint3_rte(int3);\n" |
| 28786 | "uint3 __ovld __cnfn convert_uint3_sat_rte(int3);\n" |
| 28787 | "uint3 __ovld __cnfn convert_uint3_rtz(int3);\n" |
| 28788 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(int3);\n" |
| 28789 | "uint3 __ovld __cnfn convert_uint3_rtp(int3);\n" |
| 28790 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(int3);\n" |
| 28791 | "uint3 __ovld __cnfn convert_uint3_rtn(int3);\n" |
| 28792 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(int3);\n" |
| 28793 | "uint3 __ovld __cnfn convert_uint3(int3);\n" |
| 28794 | "uint3 __ovld __cnfn convert_uint3_sat(int3);\n" |
| 28795 | "uint3 __ovld __cnfn convert_uint3_rte(uint3);\n" |
| 28796 | "uint3 __ovld __cnfn convert_uint3_sat_rte(uint3);\n" |
| 28797 | "uint3 __ovld __cnfn convert_uint3_rtz(uint3);\n" |
| 28798 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(uint3);\n" |
| 28799 | "uint3 __ovld __cnfn convert_uint3_rtp(uint3);\n" |
| 28800 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(uint3);\n" |
| 28801 | "uint3 __ovld __cnfn convert_uint3_rtn(uint3);\n" |
| 28802 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(uint3);\n" |
| 28803 | "uint3 __ovld __cnfn convert_uint3(uint3);\n" |
| 28804 | "uint3 __ovld __cnfn convert_uint3_sat(uint3);\n" |
| 28805 | "uint3 __ovld __cnfn convert_uint3_rte(long3);\n" |
| 28806 | "uint3 __ovld __cnfn convert_uint3_sat_rte(long3);\n" |
| 28807 | "uint3 __ovld __cnfn convert_uint3_rtz(long3);\n" |
| 28808 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(long3);\n" |
| 28809 | "uint3 __ovld __cnfn convert_uint3_rtp(long3);\n" |
| 28810 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(long3);\n" |
| 28811 | "uint3 __ovld __cnfn convert_uint3_rtn(long3);\n" |
| 28812 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(long3);\n" |
| 28813 | "uint3 __ovld __cnfn convert_uint3(long3);\n" |
| 28814 | "uint3 __ovld __cnfn convert_uint3_sat(long3);\n" |
| 28815 | "uint3 __ovld __cnfn convert_uint3_rte(ulong3);\n" |
| 28816 | "uint3 __ovld __cnfn convert_uint3_sat_rte(ulong3);\n" |
| 28817 | "uint3 __ovld __cnfn convert_uint3_rtz(ulong3);\n" |
| 28818 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(ulong3);\n" |
| 28819 | "uint3 __ovld __cnfn convert_uint3_rtp(ulong3);\n" |
| 28820 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(ulong3);\n" |
| 28821 | "uint3 __ovld __cnfn convert_uint3_rtn(ulong3);\n" |
| 28822 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(ulong3);\n" |
| 28823 | "uint3 __ovld __cnfn convert_uint3(ulong3);\n" |
| 28824 | "uint3 __ovld __cnfn convert_uint3_sat(ulong3);\n" |
| 28825 | "uint3 __ovld __cnfn convert_uint3_rte(float3);\n" |
| 28826 | "uint3 __ovld __cnfn convert_uint3_sat_rte(float3);\n" |
| 28827 | "uint3 __ovld __cnfn convert_uint3_rtz(float3);\n" |
| 28828 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(float3);\n" |
| 28829 | "uint3 __ovld __cnfn convert_uint3_rtp(float3);\n" |
| 28830 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(float3);\n" |
| 28831 | "uint3 __ovld __cnfn convert_uint3_rtn(float3);\n" |
| 28832 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(float3);\n" |
| 28833 | "uint3 __ovld __cnfn convert_uint3(float3);\n" |
| 28834 | "uint3 __ovld __cnfn convert_uint3_sat(float3);\n" |
| 28835 | "long3 __ovld __cnfn convert_long3_rte(char3);\n" |
| 28836 | "long3 __ovld __cnfn convert_long3_sat_rte(char3);\n" |
| 28837 | "long3 __ovld __cnfn convert_long3_rtz(char3);\n" |
| 28838 | "long3 __ovld __cnfn convert_long3_sat_rtz(char3);\n" |
| 28839 | "long3 __ovld __cnfn convert_long3_rtp(char3);\n" |
| 28840 | "long3 __ovld __cnfn convert_long3_sat_rtp(char3);\n" |
| 28841 | "long3 __ovld __cnfn convert_long3_rtn(char3);\n" |
| 28842 | "long3 __ovld __cnfn convert_long3_sat_rtn(char3);\n" |
| 28843 | "long3 __ovld __cnfn convert_long3(char3);\n" |
| 28844 | "long3 __ovld __cnfn convert_long3_sat(char3);\n" |
| 28845 | "long3 __ovld __cnfn convert_long3_rte(uchar3);\n" |
| 28846 | "long3 __ovld __cnfn convert_long3_sat_rte(uchar3);\n" |
| 28847 | "long3 __ovld __cnfn convert_long3_rtz(uchar3);\n" |
| 28848 | "long3 __ovld __cnfn convert_long3_sat_rtz(uchar3);\n" |
| 28849 | "long3 __ovld __cnfn convert_long3_rtp(uchar3);\n" |
| 28850 | "long3 __ovld __cnfn convert_long3_sat_rtp(uchar3);\n" |
| 28851 | "long3 __ovld __cnfn convert_long3_rtn(uchar3);\n" |
| 28852 | "long3 __ovld __cnfn convert_long3_sat_rtn(uchar3);\n" |
| 28853 | "long3 __ovld __cnfn convert_long3(uchar3);\n" |
| 28854 | "long3 __ovld __cnfn convert_long3_sat(uchar3);\n" |
| 28855 | "long3 __ovld __cnfn convert_long3_rte(short3);\n" |
| 28856 | "long3 __ovld __cnfn convert_long3_sat_rte(short3);\n" |
| 28857 | "long3 __ovld __cnfn convert_long3_rtz(short3);\n" |
| 28858 | "long3 __ovld __cnfn convert_long3_sat_rtz(short3);\n" |
| 28859 | "long3 __ovld __cnfn convert_long3_rtp(short3);\n" |
| 28860 | "long3 __ovld __cnfn convert_long3_sat_rtp(short3);\n" |
| 28861 | "long3 __ovld __cnfn convert_long3_rtn(short3);\n" |
| 28862 | "long3 __ovld __cnfn convert_long3_sat_rtn(short3);\n" |
| 28863 | "long3 __ovld __cnfn convert_long3(short3);\n" |
| 28864 | "long3 __ovld __cnfn convert_long3_sat(short3);\n" |
| 28865 | "long3 __ovld __cnfn convert_long3_rte(ushort3);\n" |
| 28866 | "long3 __ovld __cnfn convert_long3_sat_rte(ushort3);\n" |
| 28867 | "long3 __ovld __cnfn convert_long3_rtz(ushort3);\n" |
| 28868 | "long3 __ovld __cnfn convert_long3_sat_rtz(ushort3);\n" |
| 28869 | "long3 __ovld __cnfn convert_long3_rtp(ushort3);\n" |
| 28870 | "long3 __ovld __cnfn convert_long3_sat_rtp(ushort3);\n" |
| 28871 | "long3 __ovld __cnfn convert_long3_rtn(ushort3);\n" |
| 28872 | "long3 __ovld __cnfn convert_long3_sat_rtn(ushort3);\n" |
| 28873 | "long3 __ovld __cnfn convert_long3(ushort3);\n" |
| 28874 | "long3 __ovld __cnfn convert_long3_sat(ushort3);\n" |
| 28875 | "long3 __ovld __cnfn convert_long3_rte(int3);\n" |
| 28876 | "long3 __ovld __cnfn convert_long3_sat_rte(int3);\n" |
| 28877 | "long3 __ovld __cnfn convert_long3_rtz(int3);\n" |
| 28878 | "long3 __ovld __cnfn convert_long3_sat_rtz(int3);\n" |
| 28879 | "long3 __ovld __cnfn convert_long3_rtp(int3);\n" |
| 28880 | "long3 __ovld __cnfn convert_long3_sat_rtp(int3);\n" |
| 28881 | "long3 __ovld __cnfn convert_long3_rtn(int3);\n" |
| 28882 | "long3 __ovld __cnfn convert_long3_sat_rtn(int3);\n" |
| 28883 | "long3 __ovld __cnfn convert_long3(int3);\n" |
| 28884 | "long3 __ovld __cnfn convert_long3_sat(int3);\n" |
| 28885 | "long3 __ovld __cnfn convert_long3_rte(uint3);\n" |
| 28886 | "long3 __ovld __cnfn convert_long3_sat_rte(uint3);\n" |
| 28887 | "long3 __ovld __cnfn convert_long3_rtz(uint3);\n" |
| 28888 | "long3 __ovld __cnfn convert_long3_sat_rtz(uint3);\n" |
| 28889 | "long3 __ovld __cnfn convert_long3_rtp(uint3);\n" |
| 28890 | "long3 __ovld __cnfn convert_long3_sat_rtp(uint3);\n" |
| 28891 | "long3 __ovld __cnfn convert_long3_rtn(uint3);\n" |
| 28892 | "long3 __ovld __cnfn convert_long3_sat_rtn(uint3);\n" |
| 28893 | "long3 __ovld __cnfn convert_long3(uint3);\n" |
| 28894 | "long3 __ovld __cnfn convert_long3_sat(uint3);\n" |
| 28895 | "long3 __ovld __cnfn convert_long3_rte(long3);\n" |
| 28896 | "long3 __ovld __cnfn convert_long3_sat_rte(long3);\n" |
| 28897 | "long3 __ovld __cnfn convert_long3_rtz(long3);\n" |
| 28898 | "long3 __ovld __cnfn convert_long3_sat_rtz(long3);\n" |
| 28899 | "long3 __ovld __cnfn convert_long3_rtp(long3);\n" |
| 28900 | "long3 __ovld __cnfn convert_long3_sat_rtp(long3);\n" |
| 28901 | "long3 __ovld __cnfn convert_long3_rtn(long3);\n" |
| 28902 | "long3 __ovld __cnfn convert_long3_sat_rtn(long3);\n" |
| 28903 | "long3 __ovld __cnfn convert_long3(long3);\n" |
| 28904 | "long3 __ovld __cnfn convert_long3_sat(long3);\n" |
| 28905 | "long3 __ovld __cnfn convert_long3_rte(ulong3);\n" |
| 28906 | "long3 __ovld __cnfn convert_long3_sat_rte(ulong3);\n" |
| 28907 | "long3 __ovld __cnfn convert_long3_rtz(ulong3);\n" |
| 28908 | "long3 __ovld __cnfn convert_long3_sat_rtz(ulong3);\n" |
| 28909 | "long3 __ovld __cnfn convert_long3_rtp(ulong3);\n" |
| 28910 | "long3 __ovld __cnfn convert_long3_sat_rtp(ulong3);\n" |
| 28911 | "long3 __ovld __cnfn convert_long3_rtn(ulong3);\n" |
| 28912 | "long3 __ovld __cnfn convert_long3_sat_rtn(ulong3);\n" |
| 28913 | "long3 __ovld __cnfn convert_long3(ulong3);\n" |
| 28914 | "long3 __ovld __cnfn convert_long3_sat(ulong3);\n" |
| 28915 | "long3 __ovld __cnfn convert_long3_rte(float3);\n" |
| 28916 | "long3 __ovld __cnfn convert_long3_sat_rte(float3);\n" |
| 28917 | "long3 __ovld __cnfn convert_long3_rtz(float3);\n" |
| 28918 | "long3 __ovld __cnfn convert_long3_sat_rtz(float3);\n" |
| 28919 | "long3 __ovld __cnfn convert_long3_rtp(float3);\n" |
| 28920 | "long3 __ovld __cnfn convert_long3_sat_rtp(float3);\n" |
| 28921 | "long3 __ovld __cnfn convert_long3_rtn(float3);\n" |
| 28922 | "long3 __ovld __cnfn convert_long3_sat_rtn(float3);\n" |
| 28923 | "long3 __ovld __cnfn convert_long3(float3);\n" |
| 28924 | "long3 __ovld __cnfn convert_long3_sat(float3);\n" |
| 28925 | "ulong3 __ovld __cnfn convert_ulong3_rte(char3);\n" |
| 28926 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(char3);\n" |
| 28927 | "ulong3 __ovld __cnfn convert_ulong3_rtz(char3);\n" |
| 28928 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(char3);\n" |
| 28929 | "ulong3 __ovld __cnfn convert_ulong3_rtp(char3);\n" |
| 28930 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(char3);\n" |
| 28931 | "ulong3 __ovld __cnfn convert_ulong3_rtn(char3);\n" |
| 28932 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(char3);\n" |
| 28933 | "ulong3 __ovld __cnfn convert_ulong3(char3);\n" |
| 28934 | "ulong3 __ovld __cnfn convert_ulong3_sat(char3);\n" |
| 28935 | "ulong3 __ovld __cnfn convert_ulong3_rte(uchar3);\n" |
| 28936 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(uchar3);\n" |
| 28937 | "ulong3 __ovld __cnfn convert_ulong3_rtz(uchar3);\n" |
| 28938 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uchar3);\n" |
| 28939 | "ulong3 __ovld __cnfn convert_ulong3_rtp(uchar3);\n" |
| 28940 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uchar3);\n" |
| 28941 | "ulong3 __ovld __cnfn convert_ulong3_rtn(uchar3);\n" |
| 28942 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uchar3);\n" |
| 28943 | "ulong3 __ovld __cnfn convert_ulong3(uchar3);\n" |
| 28944 | "ulong3 __ovld __cnfn convert_ulong3_sat(uchar3);\n" |
| 28945 | "ulong3 __ovld __cnfn convert_ulong3_rte(short3);\n" |
| 28946 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(short3);\n" |
| 28947 | "ulong3 __ovld __cnfn convert_ulong3_rtz(short3);\n" |
| 28948 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(short3);\n" |
| 28949 | "ulong3 __ovld __cnfn convert_ulong3_rtp(short3);\n" |
| 28950 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(short3);\n" |
| 28951 | "ulong3 __ovld __cnfn convert_ulong3_rtn(short3);\n" |
| 28952 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(short3);\n" |
| 28953 | "ulong3 __ovld __cnfn convert_ulong3(short3);\n" |
| 28954 | "ulong3 __ovld __cnfn convert_ulong3_sat(short3);\n" |
| 28955 | "ulong3 __ovld __cnfn convert_ulong3_rte(ushort3);\n" |
| 28956 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(ushort3);\n" |
| 28957 | "ulong3 __ovld __cnfn convert_ulong3_rtz(ushort3);\n" |
| 28958 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ushort3);\n" |
| 28959 | "ulong3 __ovld __cnfn convert_ulong3_rtp(ushort3);\n" |
| 28960 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ushort3);\n" |
| 28961 | "ulong3 __ovld __cnfn convert_ulong3_rtn(ushort3);\n" |
| 28962 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ushort3);\n" |
| 28963 | "ulong3 __ovld __cnfn convert_ulong3(ushort3);\n" |
| 28964 | "ulong3 __ovld __cnfn convert_ulong3_sat(ushort3);\n" |
| 28965 | "ulong3 __ovld __cnfn convert_ulong3_rte(int3);\n" |
| 28966 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(int3);\n" |
| 28967 | "ulong3 __ovld __cnfn convert_ulong3_rtz(int3);\n" |
| 28968 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(int3);\n" |
| 28969 | "ulong3 __ovld __cnfn convert_ulong3_rtp(int3);\n" |
| 28970 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(int3);\n" |
| 28971 | "ulong3 __ovld __cnfn convert_ulong3_rtn(int3);\n" |
| 28972 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(int3);\n" |
| 28973 | "ulong3 __ovld __cnfn convert_ulong3(int3);\n" |
| 28974 | "ulong3 __ovld __cnfn convert_ulong3_sat(int3);\n" |
| 28975 | "ulong3 __ovld __cnfn convert_ulong3_rte(uint3);\n" |
| 28976 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(uint3);\n" |
| 28977 | "ulong3 __ovld __cnfn convert_ulong3_rtz(uint3);\n" |
| 28978 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(uint3);\n" |
| 28979 | "ulong3 __ovld __cnfn convert_ulong3_rtp(uint3);\n" |
| 28980 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(uint3);\n" |
| 28981 | "ulong3 __ovld __cnfn convert_ulong3_rtn(uint3);\n" |
| 28982 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(uint3);\n" |
| 28983 | "ulong3 __ovld __cnfn convert_ulong3(uint3);\n" |
| 28984 | "ulong3 __ovld __cnfn convert_ulong3_sat(uint3);\n" |
| 28985 | "ulong3 __ovld __cnfn convert_ulong3_rte(long3);\n" |
| 28986 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(long3);\n" |
| 28987 | "ulong3 __ovld __cnfn convert_ulong3_rtz(long3);\n" |
| 28988 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(long3);\n" |
| 28989 | "ulong3 __ovld __cnfn convert_ulong3_rtp(long3);\n" |
| 28990 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(long3);\n" |
| 28991 | "ulong3 __ovld __cnfn convert_ulong3_rtn(long3);\n" |
| 28992 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(long3);\n" |
| 28993 | "ulong3 __ovld __cnfn convert_ulong3(long3);\n" |
| 28994 | "ulong3 __ovld __cnfn convert_ulong3_sat(long3);\n" |
| 28995 | "ulong3 __ovld __cnfn convert_ulong3_rte(ulong3);\n" |
| 28996 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(ulong3);\n" |
| 28997 | "ulong3 __ovld __cnfn convert_ulong3_rtz(ulong3);\n" |
| 28998 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(ulong3);\n" |
| 28999 | "ulong3 __ovld __cnfn convert_ulong3_rtp(ulong3);\n" |
| 29000 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(ulong3);\n" |
| 29001 | "ulong3 __ovld __cnfn convert_ulong3_rtn(ulong3);\n" |
| 29002 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(ulong3);\n" |
| 29003 | "ulong3 __ovld __cnfn convert_ulong3(ulong3);\n" |
| 29004 | "ulong3 __ovld __cnfn convert_ulong3_sat(ulong3);\n" |
| 29005 | "ulong3 __ovld __cnfn convert_ulong3_rte(float3);\n" |
| 29006 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(float3);\n" |
| 29007 | "ulong3 __ovld __cnfn convert_ulong3_rtz(float3);\n" |
| 29008 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(float3);\n" |
| 29009 | "ulong3 __ovld __cnfn convert_ulong3_rtp(float3);\n" |
| 29010 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(float3);\n" |
| 29011 | "ulong3 __ovld __cnfn convert_ulong3_rtn(float3);\n" |
| 29012 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(float3);\n" |
| 29013 | "ulong3 __ovld __cnfn convert_ulong3(float3);\n" |
| 29014 | "ulong3 __ovld __cnfn convert_ulong3_sat(float3);\n" |
| 29015 | "float3 __ovld __cnfn convert_float3_rte(char3);\n" |
| 29016 | "float3 __ovld __cnfn convert_float3_rtz(char3);\n" |
| 29017 | "float3 __ovld __cnfn convert_float3_rtp(char3);\n" |
| 29018 | "float3 __ovld __cnfn convert_float3_rtn(char3);\n" |
| 29019 | "float3 __ovld __cnfn convert_float3(char3);\n" |
| 29020 | "float3 __ovld __cnfn convert_float3_rte(uchar3);\n" |
| 29021 | "float3 __ovld __cnfn convert_float3_rtz(uchar3);\n" |
| 29022 | "float3 __ovld __cnfn convert_float3_rtp(uchar3);\n" |
| 29023 | "float3 __ovld __cnfn convert_float3_rtn(uchar3);\n" |
| 29024 | "float3 __ovld __cnfn convert_float3(uchar3);\n" |
| 29025 | "float3 __ovld __cnfn convert_float3_rte(short3);\n" |
| 29026 | "float3 __ovld __cnfn convert_float3_rtz(short3);\n" |
| 29027 | "float3 __ovld __cnfn convert_float3_rtp(short3);\n" |
| 29028 | "float3 __ovld __cnfn convert_float3_rtn(short3);\n" |
| 29029 | "float3 __ovld __cnfn convert_float3(short3);\n" |
| 29030 | "float3 __ovld __cnfn convert_float3_rte(ushort3);\n" |
| 29031 | "float3 __ovld __cnfn convert_float3_rtz(ushort3);\n" |
| 29032 | "float3 __ovld __cnfn convert_float3_rtp(ushort3);\n" |
| 29033 | "float3 __ovld __cnfn convert_float3_rtn(ushort3);\n" |
| 29034 | "float3 __ovld __cnfn convert_float3(ushort3);\n" |
| 29035 | "float3 __ovld __cnfn convert_float3_rte(int3);\n" |
| 29036 | "float3 __ovld __cnfn convert_float3_rtz(int3);\n" |
| 29037 | "float3 __ovld __cnfn convert_float3_rtp(int3);\n" |
| 29038 | "float3 __ovld __cnfn convert_float3_rtn(int3);\n" |
| 29039 | "float3 __ovld __cnfn convert_float3(int3);\n" |
| 29040 | "float3 __ovld __cnfn convert_float3_rte(uint3);\n" |
| 29041 | "float3 __ovld __cnfn convert_float3_rtz(uint3);\n" |
| 29042 | "float3 __ovld __cnfn convert_float3_rtp(uint3);\n" |
| 29043 | "float3 __ovld __cnfn convert_float3_rtn(uint3);\n" |
| 29044 | "float3 __ovld __cnfn convert_float3(uint3);\n" |
| 29045 | "float3 __ovld __cnfn convert_float3_rte(long3);\n" |
| 29046 | "float3 __ovld __cnfn convert_float3_rtz(long3);\n" |
| 29047 | "float3 __ovld __cnfn convert_float3_rtp(long3);\n" |
| 29048 | "float3 __ovld __cnfn convert_float3_rtn(long3);\n" |
| 29049 | "float3 __ovld __cnfn convert_float3(long3);\n" |
| 29050 | "float3 __ovld __cnfn convert_float3_rte(ulong3);\n" |
| 29051 | "float3 __ovld __cnfn convert_float3_rtz(ulong3);\n" |
| 29052 | "float3 __ovld __cnfn convert_float3_rtp(ulong3);\n" |
| 29053 | "float3 __ovld __cnfn convert_float3_rtn(ulong3);\n" |
| 29054 | "float3 __ovld __cnfn convert_float3(ulong3);\n" |
| 29055 | "float3 __ovld __cnfn convert_float3_rte(float3);\n" |
| 29056 | "float3 __ovld __cnfn convert_float3_rtz(float3);\n" |
| 29057 | "float3 __ovld __cnfn convert_float3_rtp(float3);\n" |
| 29058 | "float3 __ovld __cnfn convert_float3_rtn(float3);\n" |
| 29059 | "float3 __ovld __cnfn convert_float3(float3);\n" |
| 29060 | "char4 __ovld __cnfn convert_char4_rte(char4);\n" |
| 29061 | "char4 __ovld __cnfn convert_char4_sat_rte(char4);\n" |
| 29062 | "char4 __ovld __cnfn convert_char4_rtz(char4);\n" |
| 29063 | "char4 __ovld __cnfn convert_char4_sat_rtz(char4);\n" |
| 29064 | "char4 __ovld __cnfn convert_char4_rtp(char4);\n" |
| 29065 | "char4 __ovld __cnfn convert_char4_sat_rtp(char4);\n" |
| 29066 | "char4 __ovld __cnfn convert_char4_rtn(char4);\n" |
| 29067 | "char4 __ovld __cnfn convert_char4_sat_rtn(char4);\n" |
| 29068 | "char4 __ovld __cnfn convert_char4(char4);\n" |
| 29069 | "char4 __ovld __cnfn convert_char4_sat(char4);\n" |
| 29070 | "char4 __ovld __cnfn convert_char4_rte(uchar4);\n" |
| 29071 | "char4 __ovld __cnfn convert_char4_sat_rte(uchar4);\n" |
| 29072 | "char4 __ovld __cnfn convert_char4_rtz(uchar4);\n" |
| 29073 | "char4 __ovld __cnfn convert_char4_sat_rtz(uchar4);\n" |
| 29074 | "char4 __ovld __cnfn convert_char4_rtp(uchar4);\n" |
| 29075 | "char4 __ovld __cnfn convert_char4_sat_rtp(uchar4);\n" |
| 29076 | "char4 __ovld __cnfn convert_char4_rtn(uchar4);\n" |
| 29077 | "char4 __ovld __cnfn convert_char4_sat_rtn(uchar4);\n" |
| 29078 | "char4 __ovld __cnfn convert_char4(uchar4);\n" |
| 29079 | "char4 __ovld __cnfn convert_char4_sat(uchar4);\n" |
| 29080 | "char4 __ovld __cnfn convert_char4_rte(short4);\n" |
| 29081 | "char4 __ovld __cnfn convert_char4_sat_rte(short4);\n" |
| 29082 | "char4 __ovld __cnfn convert_char4_rtz(short4);\n" |
| 29083 | "char4 __ovld __cnfn convert_char4_sat_rtz(short4);\n" |
| 29084 | "char4 __ovld __cnfn convert_char4_rtp(short4);\n" |
| 29085 | "char4 __ovld __cnfn convert_char4_sat_rtp(short4);\n" |
| 29086 | "char4 __ovld __cnfn convert_char4_rtn(short4);\n" |
| 29087 | "char4 __ovld __cnfn convert_char4_sat_rtn(short4);\n" |
| 29088 | "char4 __ovld __cnfn convert_char4(short4);\n" |
| 29089 | "char4 __ovld __cnfn convert_char4_sat(short4);\n" |
| 29090 | "char4 __ovld __cnfn convert_char4_rte(ushort4);\n" |
| 29091 | "char4 __ovld __cnfn convert_char4_sat_rte(ushort4);\n" |
| 29092 | "char4 __ovld __cnfn convert_char4_rtz(ushort4);\n" |
| 29093 | "char4 __ovld __cnfn convert_char4_sat_rtz(ushort4);\n" |
| 29094 | "char4 __ovld __cnfn convert_char4_rtp(ushort4);\n" |
| 29095 | "char4 __ovld __cnfn convert_char4_sat_rtp(ushort4);\n" |
| 29096 | "char4 __ovld __cnfn convert_char4_rtn(ushort4);\n" |
| 29097 | "char4 __ovld __cnfn convert_char4_sat_rtn(ushort4);\n" |
| 29098 | "char4 __ovld __cnfn convert_char4(ushort4);\n" |
| 29099 | "char4 __ovld __cnfn convert_char4_sat(ushort4);\n" |
| 29100 | "char4 __ovld __cnfn convert_char4_rte(int4);\n" |
| 29101 | "char4 __ovld __cnfn convert_char4_sat_rte(int4);\n" |
| 29102 | "char4 __ovld __cnfn convert_char4_rtz(int4);\n" |
| 29103 | "char4 __ovld __cnfn convert_char4_sat_rtz(int4);\n" |
| 29104 | "char4 __ovld __cnfn convert_char4_rtp(int4);\n" |
| 29105 | "char4 __ovld __cnfn convert_char4_sat_rtp(int4);\n" |
| 29106 | "char4 __ovld __cnfn convert_char4_rtn(int4);\n" |
| 29107 | "char4 __ovld __cnfn convert_char4_sat_rtn(int4);\n" |
| 29108 | "char4 __ovld __cnfn convert_char4(int4);\n" |
| 29109 | "char4 __ovld __cnfn convert_char4_sat(int4);\n" |
| 29110 | "char4 __ovld __cnfn convert_char4_rte(uint4);\n" |
| 29111 | "char4 __ovld __cnfn convert_char4_sat_rte(uint4);\n" |
| 29112 | "char4 __ovld __cnfn convert_char4_rtz(uint4);\n" |
| 29113 | "char4 __ovld __cnfn convert_char4_sat_rtz(uint4);\n" |
| 29114 | "char4 __ovld __cnfn convert_char4_rtp(uint4);\n" |
| 29115 | "char4 __ovld __cnfn convert_char4_sat_rtp(uint4);\n" |
| 29116 | "char4 __ovld __cnfn convert_char4_rtn(uint4);\n" |
| 29117 | "char4 __ovld __cnfn convert_char4_sat_rtn(uint4);\n" |
| 29118 | "char4 __ovld __cnfn convert_char4(uint4);\n" |
| 29119 | "char4 __ovld __cnfn convert_char4_sat(uint4);\n" |
| 29120 | "char4 __ovld __cnfn convert_char4_rte(long4);\n" |
| 29121 | "char4 __ovld __cnfn convert_char4_sat_rte(long4);\n" |
| 29122 | "char4 __ovld __cnfn convert_char4_rtz(long4);\n" |
| 29123 | "char4 __ovld __cnfn convert_char4_sat_rtz(long4);\n" |
| 29124 | "char4 __ovld __cnfn convert_char4_rtp(long4);\n" |
| 29125 | "char4 __ovld __cnfn convert_char4_sat_rtp(long4);\n" |
| 29126 | "char4 __ovld __cnfn convert_char4_rtn(long4);\n" |
| 29127 | "char4 __ovld __cnfn convert_char4_sat_rtn(long4);\n" |
| 29128 | "char4 __ovld __cnfn convert_char4(long4);\n" |
| 29129 | "char4 __ovld __cnfn convert_char4_sat(long4);\n" |
| 29130 | "char4 __ovld __cnfn convert_char4_rte(ulong4);\n" |
| 29131 | "char4 __ovld __cnfn convert_char4_sat_rte(ulong4);\n" |
| 29132 | "char4 __ovld __cnfn convert_char4_rtz(ulong4);\n" |
| 29133 | "char4 __ovld __cnfn convert_char4_sat_rtz(ulong4);\n" |
| 29134 | "char4 __ovld __cnfn convert_char4_rtp(ulong4);\n" |
| 29135 | "char4 __ovld __cnfn convert_char4_sat_rtp(ulong4);\n" |
| 29136 | "char4 __ovld __cnfn convert_char4_rtn(ulong4);\n" |
| 29137 | "char4 __ovld __cnfn convert_char4_sat_rtn(ulong4);\n" |
| 29138 | "char4 __ovld __cnfn convert_char4(ulong4);\n" |
| 29139 | "char4 __ovld __cnfn convert_char4_sat(ulong4);\n" |
| 29140 | "char4 __ovld __cnfn convert_char4_rte(float4);\n" |
| 29141 | "char4 __ovld __cnfn convert_char4_sat_rte(float4);\n" |
| 29142 | "char4 __ovld __cnfn convert_char4_rtz(float4);\n" |
| 29143 | "char4 __ovld __cnfn convert_char4_sat_rtz(float4);\n" |
| 29144 | "char4 __ovld __cnfn convert_char4_rtp(float4);\n" |
| 29145 | "char4 __ovld __cnfn convert_char4_sat_rtp(float4);\n" |
| 29146 | "char4 __ovld __cnfn convert_char4_rtn(float4);\n" |
| 29147 | "char4 __ovld __cnfn convert_char4_sat_rtn(float4);\n" |
| 29148 | "char4 __ovld __cnfn convert_char4(float4);\n" |
| 29149 | "char4 __ovld __cnfn convert_char4_sat(float4);\n" |
| 29150 | "uchar4 __ovld __cnfn convert_uchar4_rte(char4);\n" |
| 29151 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(char4);\n" |
| 29152 | "uchar4 __ovld __cnfn convert_uchar4_rtz(char4);\n" |
| 29153 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(char4);\n" |
| 29154 | "uchar4 __ovld __cnfn convert_uchar4_rtp(char4);\n" |
| 29155 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(char4);\n" |
| 29156 | "uchar4 __ovld __cnfn convert_uchar4_rtn(char4);\n" |
| 29157 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(char4);\n" |
| 29158 | "uchar4 __ovld __cnfn convert_uchar4(char4);\n" |
| 29159 | "uchar4 __ovld __cnfn convert_uchar4_sat(char4);\n" |
| 29160 | "uchar4 __ovld __cnfn convert_uchar4_rte(uchar4);\n" |
| 29161 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(uchar4);\n" |
| 29162 | "uchar4 __ovld __cnfn convert_uchar4_rtz(uchar4);\n" |
| 29163 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uchar4);\n" |
| 29164 | "uchar4 __ovld __cnfn convert_uchar4_rtp(uchar4);\n" |
| 29165 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uchar4);\n" |
| 29166 | "uchar4 __ovld __cnfn convert_uchar4_rtn(uchar4);\n" |
| 29167 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uchar4);\n" |
| 29168 | "uchar4 __ovld __cnfn convert_uchar4(uchar4);\n" |
| 29169 | "uchar4 __ovld __cnfn convert_uchar4_sat(uchar4);\n" |
| 29170 | "uchar4 __ovld __cnfn convert_uchar4_rte(short4);\n" |
| 29171 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(short4);\n" |
| 29172 | "uchar4 __ovld __cnfn convert_uchar4_rtz(short4);\n" |
| 29173 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(short4);\n" |
| 29174 | "uchar4 __ovld __cnfn convert_uchar4_rtp(short4);\n" |
| 29175 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(short4);\n" |
| 29176 | "uchar4 __ovld __cnfn convert_uchar4_rtn(short4);\n" |
| 29177 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(short4);\n" |
| 29178 | "uchar4 __ovld __cnfn convert_uchar4(short4);\n" |
| 29179 | "uchar4 __ovld __cnfn convert_uchar4_sat(short4);\n" |
| 29180 | "uchar4 __ovld __cnfn convert_uchar4_rte(ushort4);\n" |
| 29181 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(ushort4);\n" |
| 29182 | "uchar4 __ovld __cnfn convert_uchar4_rtz(ushort4);\n" |
| 29183 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ushort4);\n" |
| 29184 | "uchar4 __ovld __cnfn convert_uchar4_rtp(ushort4);\n" |
| 29185 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ushort4);\n" |
| 29186 | "uchar4 __ovld __cnfn convert_uchar4_rtn(ushort4);\n" |
| 29187 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ushort4);\n" |
| 29188 | "uchar4 __ovld __cnfn convert_uchar4(ushort4);\n" |
| 29189 | "uchar4 __ovld __cnfn convert_uchar4_sat(ushort4);\n" |
| 29190 | "uchar4 __ovld __cnfn convert_uchar4_rte(int4);\n" |
| 29191 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(int4);\n" |
| 29192 | "uchar4 __ovld __cnfn convert_uchar4_rtz(int4);\n" |
| 29193 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(int4);\n" |
| 29194 | "uchar4 __ovld __cnfn convert_uchar4_rtp(int4);\n" |
| 29195 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(int4);\n" |
| 29196 | "uchar4 __ovld __cnfn convert_uchar4_rtn(int4);\n" |
| 29197 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(int4);\n" |
| 29198 | "uchar4 __ovld __cnfn convert_uchar4(int4);\n" |
| 29199 | "uchar4 __ovld __cnfn convert_uchar4_sat(int4);\n" |
| 29200 | "uchar4 __ovld __cnfn convert_uchar4_rte(uint4);\n" |
| 29201 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(uint4);\n" |
| 29202 | "uchar4 __ovld __cnfn convert_uchar4_rtz(uint4);\n" |
| 29203 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(uint4);\n" |
| 29204 | "uchar4 __ovld __cnfn convert_uchar4_rtp(uint4);\n" |
| 29205 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(uint4);\n" |
| 29206 | "uchar4 __ovld __cnfn convert_uchar4_rtn(uint4);\n" |
| 29207 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(uint4);\n" |
| 29208 | "uchar4 __ovld __cnfn convert_uchar4(uint4);\n" |
| 29209 | "uchar4 __ovld __cnfn convert_uchar4_sat(uint4);\n" |
| 29210 | "uchar4 __ovld __cnfn convert_uchar4_rte(long4);\n" |
| 29211 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(long4);\n" |
| 29212 | "uchar4 __ovld __cnfn convert_uchar4_rtz(long4);\n" |
| 29213 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(long4);\n" |
| 29214 | "uchar4 __ovld __cnfn convert_uchar4_rtp(long4);\n" |
| 29215 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(long4);\n" |
| 29216 | "uchar4 __ovld __cnfn convert_uchar4_rtn(long4);\n" |
| 29217 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(long4);\n" |
| 29218 | "uchar4 __ovld __cnfn convert_uchar4(long4);\n" |
| 29219 | "uchar4 __ovld __cnfn convert_uchar4_sat(long4);\n" |
| 29220 | "uchar4 __ovld __cnfn convert_uchar4_rte(ulong4);\n" |
| 29221 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(ulong4);\n" |
| 29222 | "uchar4 __ovld __cnfn convert_uchar4_rtz(ulong4);\n" |
| 29223 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(ulong4);\n" |
| 29224 | "uchar4 __ovld __cnfn convert_uchar4_rtp(ulong4);\n" |
| 29225 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(ulong4);\n" |
| 29226 | "uchar4 __ovld __cnfn convert_uchar4_rtn(ulong4);\n" |
| 29227 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(ulong4);\n" |
| 29228 | "uchar4 __ovld __cnfn convert_uchar4(ulong4);\n" |
| 29229 | "uchar4 __ovld __cnfn convert_uchar4_sat(ulong4);\n" |
| 29230 | "uchar4 __ovld __cnfn convert_uchar4_rte(float4);\n" |
| 29231 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(float4);\n" |
| 29232 | "uchar4 __ovld __cnfn convert_uchar4_rtz(float4);\n" |
| 29233 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(float4);\n" |
| 29234 | "uchar4 __ovld __cnfn convert_uchar4_rtp(float4);\n" |
| 29235 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(float4);\n" |
| 29236 | "uchar4 __ovld __cnfn convert_uchar4_rtn(float4);\n" |
| 29237 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(float4);\n" |
| 29238 | "uchar4 __ovld __cnfn convert_uchar4(float4);\n" |
| 29239 | "uchar4 __ovld __cnfn convert_uchar4_sat(float4);\n" |
| 29240 | "short4 __ovld __cnfn convert_short4_rte(char4);\n" |
| 29241 | "short4 __ovld __cnfn convert_short4_sat_rte(char4);\n" |
| 29242 | "short4 __ovld __cnfn convert_short4_rtz(char4);\n" |
| 29243 | "short4 __ovld __cnfn convert_short4_sat_rtz(char4);\n" |
| 29244 | "short4 __ovld __cnfn convert_short4_rtp(char4);\n" |
| 29245 | "short4 __ovld __cnfn convert_short4_sat_rtp(char4);\n" |
| 29246 | "short4 __ovld __cnfn convert_short4_rtn(char4);\n" |
| 29247 | "short4 __ovld __cnfn convert_short4_sat_rtn(char4);\n" |
| 29248 | "short4 __ovld __cnfn convert_short4(char4);\n" |
| 29249 | "short4 __ovld __cnfn convert_short4_sat(char4);\n" |
| 29250 | "short4 __ovld __cnfn convert_short4_rte(uchar4);\n" |
| 29251 | "short4 __ovld __cnfn convert_short4_sat_rte(uchar4);\n" |
| 29252 | "short4 __ovld __cnfn convert_short4_rtz(uchar4);\n" |
| 29253 | "short4 __ovld __cnfn convert_short4_sat_rtz(uchar4);\n" |
| 29254 | "short4 __ovld __cnfn convert_short4_rtp(uchar4);\n" |
| 29255 | "short4 __ovld __cnfn convert_short4_sat_rtp(uchar4);\n" |
| 29256 | "short4 __ovld __cnfn convert_short4_rtn(uchar4);\n" |
| 29257 | "short4 __ovld __cnfn convert_short4_sat_rtn(uchar4);\n" |
| 29258 | "short4 __ovld __cnfn convert_short4(uchar4);\n" |
| 29259 | "short4 __ovld __cnfn convert_short4_sat(uchar4);\n" |
| 29260 | "short4 __ovld __cnfn convert_short4_rte(short4);\n" |
| 29261 | "short4 __ovld __cnfn convert_short4_sat_rte(short4);\n" |
| 29262 | "short4 __ovld __cnfn convert_short4_rtz(short4);\n" |
| 29263 | "short4 __ovld __cnfn convert_short4_sat_rtz(short4);\n" |
| 29264 | "short4 __ovld __cnfn convert_short4_rtp(short4);\n" |
| 29265 | "short4 __ovld __cnfn convert_short4_sat_rtp(short4);\n" |
| 29266 | "short4 __ovld __cnfn convert_short4_rtn(short4);\n" |
| 29267 | "short4 __ovld __cnfn convert_short4_sat_rtn(short4);\n" |
| 29268 | "short4 __ovld __cnfn convert_short4(short4);\n" |
| 29269 | "short4 __ovld __cnfn convert_short4_sat(short4);\n" |
| 29270 | "short4 __ovld __cnfn convert_short4_rte(ushort4);\n" |
| 29271 | "short4 __ovld __cnfn convert_short4_sat_rte(ushort4);\n" |
| 29272 | "short4 __ovld __cnfn convert_short4_rtz(ushort4);\n" |
| 29273 | "short4 __ovld __cnfn convert_short4_sat_rtz(ushort4);\n" |
| 29274 | "short4 __ovld __cnfn convert_short4_rtp(ushort4);\n" |
| 29275 | "short4 __ovld __cnfn convert_short4_sat_rtp(ushort4);\n" |
| 29276 | "short4 __ovld __cnfn convert_short4_rtn(ushort4);\n" |
| 29277 | "short4 __ovld __cnfn convert_short4_sat_rtn(ushort4);\n" |
| 29278 | "short4 __ovld __cnfn convert_short4(ushort4);\n" |
| 29279 | "short4 __ovld __cnfn convert_short4_sat(ushort4);\n" |
| 29280 | "short4 __ovld __cnfn convert_short4_rte(int4);\n" |
| 29281 | "short4 __ovld __cnfn convert_short4_sat_rte(int4);\n" |
| 29282 | "short4 __ovld __cnfn convert_short4_rtz(int4);\n" |
| 29283 | "short4 __ovld __cnfn convert_short4_sat_rtz(int4);\n" |
| 29284 | "short4 __ovld __cnfn convert_short4_rtp(int4);\n" |
| 29285 | "short4 __ovld __cnfn convert_short4_sat_rtp(int4);\n" |
| 29286 | "short4 __ovld __cnfn convert_short4_rtn(int4);\n" |
| 29287 | "short4 __ovld __cnfn convert_short4_sat_rtn(int4);\n" |
| 29288 | "short4 __ovld __cnfn convert_short4(int4);\n" |
| 29289 | "short4 __ovld __cnfn convert_short4_sat(int4);\n" |
| 29290 | "short4 __ovld __cnfn convert_short4_rte(uint4);\n" |
| 29291 | "short4 __ovld __cnfn convert_short4_sat_rte(uint4);\n" |
| 29292 | "short4 __ovld __cnfn convert_short4_rtz(uint4);\n" |
| 29293 | "short4 __ovld __cnfn convert_short4_sat_rtz(uint4);\n" |
| 29294 | "short4 __ovld __cnfn convert_short4_rtp(uint4);\n" |
| 29295 | "short4 __ovld __cnfn convert_short4_sat_rtp(uint4);\n" |
| 29296 | "short4 __ovld __cnfn convert_short4_rtn(uint4);\n" |
| 29297 | "short4 __ovld __cnfn convert_short4_sat_rtn(uint4);\n" |
| 29298 | "short4 __ovld __cnfn convert_short4(uint4);\n" |
| 29299 | "short4 __ovld __cnfn convert_short4_sat(uint4);\n" |
| 29300 | "short4 __ovld __cnfn convert_short4_rte(long4);\n" |
| 29301 | "short4 __ovld __cnfn convert_short4_sat_rte(long4);\n" |
| 29302 | "short4 __ovld __cnfn convert_short4_rtz(long4);\n" |
| 29303 | "short4 __ovld __cnfn convert_short4_sat_rtz(long4);\n" |
| 29304 | "short4 __ovld __cnfn convert_short4_rtp(long4);\n" |
| 29305 | "short4 __ovld __cnfn convert_short4_sat_rtp(long4);\n" |
| 29306 | "short4 __ovld __cnfn convert_short4_rtn(long4);\n" |
| 29307 | "short4 __ovld __cnfn convert_short4_sat_rtn(long4);\n" |
| 29308 | "short4 __ovld __cnfn convert_short4(long4);\n" |
| 29309 | "short4 __ovld __cnfn convert_short4_sat(long4);\n" |
| 29310 | "short4 __ovld __cnfn convert_short4_rte(ulong4);\n" |
| 29311 | "short4 __ovld __cnfn convert_short4_sat_rte(ulong4);\n" |
| 29312 | "short4 __ovld __cnfn convert_short4_rtz(ulong4);\n" |
| 29313 | "short4 __ovld __cnfn convert_short4_sat_rtz(ulong4);\n" |
| 29314 | "short4 __ovld __cnfn convert_short4_rtp(ulong4);\n" |
| 29315 | "short4 __ovld __cnfn convert_short4_sat_rtp(ulong4);\n" |
| 29316 | "short4 __ovld __cnfn convert_short4_rtn(ulong4);\n" |
| 29317 | "short4 __ovld __cnfn convert_short4_sat_rtn(ulong4);\n" |
| 29318 | "short4 __ovld __cnfn convert_short4(ulong4);\n" |
| 29319 | "short4 __ovld __cnfn convert_short4_sat(ulong4);\n" |
| 29320 | "short4 __ovld __cnfn convert_short4_rte(float4);\n" |
| 29321 | "short4 __ovld __cnfn convert_short4_sat_rte(float4);\n" |
| 29322 | "short4 __ovld __cnfn convert_short4_rtz(float4);\n" |
| 29323 | "short4 __ovld __cnfn convert_short4_sat_rtz(float4);\n" |
| 29324 | "short4 __ovld __cnfn convert_short4_rtp(float4);\n" |
| 29325 | "short4 __ovld __cnfn convert_short4_sat_rtp(float4);\n" |
| 29326 | "short4 __ovld __cnfn convert_short4_rtn(float4);\n" |
| 29327 | "short4 __ovld __cnfn convert_short4_sat_rtn(float4);\n" |
| 29328 | "short4 __ovld __cnfn convert_short4(float4);\n" |
| 29329 | "short4 __ovld __cnfn convert_short4_sat(float4);\n" |
| 29330 | "ushort4 __ovld __cnfn convert_ushort4_rte(char4);\n" |
| 29331 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(char4);\n" |
| 29332 | "ushort4 __ovld __cnfn convert_ushort4_rtz(char4);\n" |
| 29333 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(char4);\n" |
| 29334 | "ushort4 __ovld __cnfn convert_ushort4_rtp(char4);\n" |
| 29335 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(char4);\n" |
| 29336 | "ushort4 __ovld __cnfn convert_ushort4_rtn(char4);\n" |
| 29337 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(char4);\n" |
| 29338 | "ushort4 __ovld __cnfn convert_ushort4(char4);\n" |
| 29339 | "ushort4 __ovld __cnfn convert_ushort4_sat(char4);\n" |
| 29340 | "ushort4 __ovld __cnfn convert_ushort4_rte(uchar4);\n" |
| 29341 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(uchar4);\n" |
| 29342 | "ushort4 __ovld __cnfn convert_ushort4_rtz(uchar4);\n" |
| 29343 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uchar4);\n" |
| 29344 | "ushort4 __ovld __cnfn convert_ushort4_rtp(uchar4);\n" |
| 29345 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uchar4);\n" |
| 29346 | "ushort4 __ovld __cnfn convert_ushort4_rtn(uchar4);\n" |
| 29347 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uchar4);\n" |
| 29348 | "ushort4 __ovld __cnfn convert_ushort4(uchar4);\n" |
| 29349 | "ushort4 __ovld __cnfn convert_ushort4_sat(uchar4);\n" |
| 29350 | "ushort4 __ovld __cnfn convert_ushort4_rte(short4);\n" |
| 29351 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(short4);\n" |
| 29352 | "ushort4 __ovld __cnfn convert_ushort4_rtz(short4);\n" |
| 29353 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(short4);\n" |
| 29354 | "ushort4 __ovld __cnfn convert_ushort4_rtp(short4);\n" |
| 29355 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(short4);\n" |
| 29356 | "ushort4 __ovld __cnfn convert_ushort4_rtn(short4);\n" |
| 29357 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(short4);\n" |
| 29358 | "ushort4 __ovld __cnfn convert_ushort4(short4);\n" |
| 29359 | "ushort4 __ovld __cnfn convert_ushort4_sat(short4);\n" |
| 29360 | "ushort4 __ovld __cnfn convert_ushort4_rte(ushort4);\n" |
| 29361 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(ushort4);\n" |
| 29362 | "ushort4 __ovld __cnfn convert_ushort4_rtz(ushort4);\n" |
| 29363 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ushort4);\n" |
| 29364 | "ushort4 __ovld __cnfn convert_ushort4_rtp(ushort4);\n" |
| 29365 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ushort4);\n" |
| 29366 | "ushort4 __ovld __cnfn convert_ushort4_rtn(ushort4);\n" |
| 29367 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ushort4);\n" |
| 29368 | "ushort4 __ovld __cnfn convert_ushort4(ushort4);\n" |
| 29369 | "ushort4 __ovld __cnfn convert_ushort4_sat(ushort4);\n" |
| 29370 | "ushort4 __ovld __cnfn convert_ushort4_rte(int4);\n" |
| 29371 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(int4);\n" |
| 29372 | "ushort4 __ovld __cnfn convert_ushort4_rtz(int4);\n" |
| 29373 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(int4);\n" |
| 29374 | "ushort4 __ovld __cnfn convert_ushort4_rtp(int4);\n" |
| 29375 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(int4);\n" |
| 29376 | "ushort4 __ovld __cnfn convert_ushort4_rtn(int4);\n" |
| 29377 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(int4);\n" |
| 29378 | "ushort4 __ovld __cnfn convert_ushort4(int4);\n" |
| 29379 | "ushort4 __ovld __cnfn convert_ushort4_sat(int4);\n" |
| 29380 | "ushort4 __ovld __cnfn convert_ushort4_rte(uint4);\n" |
| 29381 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(uint4);\n" |
| 29382 | "ushort4 __ovld __cnfn convert_ushort4_rtz(uint4);\n" |
| 29383 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(uint4);\n" |
| 29384 | "ushort4 __ovld __cnfn convert_ushort4_rtp(uint4);\n" |
| 29385 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(uint4);\n" |
| 29386 | "ushort4 __ovld __cnfn convert_ushort4_rtn(uint4);\n" |
| 29387 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(uint4);\n" |
| 29388 | "ushort4 __ovld __cnfn convert_ushort4(uint4);\n" |
| 29389 | "ushort4 __ovld __cnfn convert_ushort4_sat(uint4);\n" |
| 29390 | "ushort4 __ovld __cnfn convert_ushort4_rte(long4);\n" |
| 29391 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(long4);\n" |
| 29392 | "ushort4 __ovld __cnfn convert_ushort4_rtz(long4);\n" |
| 29393 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(long4);\n" |
| 29394 | "ushort4 __ovld __cnfn convert_ushort4_rtp(long4);\n" |
| 29395 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(long4);\n" |
| 29396 | "ushort4 __ovld __cnfn convert_ushort4_rtn(long4);\n" |
| 29397 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(long4);\n" |
| 29398 | "ushort4 __ovld __cnfn convert_ushort4(long4);\n" |
| 29399 | "ushort4 __ovld __cnfn convert_ushort4_sat(long4);\n" |
| 29400 | "ushort4 __ovld __cnfn convert_ushort4_rte(ulong4);\n" |
| 29401 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(ulong4);\n" |
| 29402 | "ushort4 __ovld __cnfn convert_ushort4_rtz(ulong4);\n" |
| 29403 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(ulong4);\n" |
| 29404 | "ushort4 __ovld __cnfn convert_ushort4_rtp(ulong4);\n" |
| 29405 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(ulong4);\n" |
| 29406 | "ushort4 __ovld __cnfn convert_ushort4_rtn(ulong4);\n" |
| 29407 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(ulong4);\n" |
| 29408 | "ushort4 __ovld __cnfn convert_ushort4(ulong4);\n" |
| 29409 | "ushort4 __ovld __cnfn convert_ushort4_sat(ulong4);\n" |
| 29410 | "ushort4 __ovld __cnfn convert_ushort4_rte(float4);\n" |
| 29411 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(float4);\n" |
| 29412 | "ushort4 __ovld __cnfn convert_ushort4_rtz(float4);\n" |
| 29413 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(float4);\n" |
| 29414 | "ushort4 __ovld __cnfn convert_ushort4_rtp(float4);\n" |
| 29415 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(float4);\n" |
| 29416 | "ushort4 __ovld __cnfn convert_ushort4_rtn(float4);\n" |
| 29417 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(float4);\n" |
| 29418 | "ushort4 __ovld __cnfn convert_ushort4(float4);\n" |
| 29419 | "ushort4 __ovld __cnfn convert_ushort4_sat(float4);\n" |
| 29420 | "int4 __ovld __cnfn convert_int4_rte(char4);\n" |
| 29421 | "int4 __ovld __cnfn convert_int4_sat_rte(char4);\n" |
| 29422 | "int4 __ovld __cnfn convert_int4_rtz(char4);\n" |
| 29423 | "int4 __ovld __cnfn convert_int4_sat_rtz(char4);\n" |
| 29424 | "int4 __ovld __cnfn convert_int4_rtp(char4);\n" |
| 29425 | "int4 __ovld __cnfn convert_int4_sat_rtp(char4);\n" |
| 29426 | "int4 __ovld __cnfn convert_int4_rtn(char4);\n" |
| 29427 | "int4 __ovld __cnfn convert_int4_sat_rtn(char4);\n" |
| 29428 | "int4 __ovld __cnfn convert_int4(char4);\n" |
| 29429 | "int4 __ovld __cnfn convert_int4_sat(char4);\n" |
| 29430 | "int4 __ovld __cnfn convert_int4_rte(uchar4);\n" |
| 29431 | "int4 __ovld __cnfn convert_int4_sat_rte(uchar4);\n" |
| 29432 | "int4 __ovld __cnfn convert_int4_rtz(uchar4);\n" |
| 29433 | "int4 __ovld __cnfn convert_int4_sat_rtz(uchar4);\n" |
| 29434 | "int4 __ovld __cnfn convert_int4_rtp(uchar4);\n" |
| 29435 | "int4 __ovld __cnfn convert_int4_sat_rtp(uchar4);\n" |
| 29436 | "int4 __ovld __cnfn convert_int4_rtn(uchar4);\n" |
| 29437 | "int4 __ovld __cnfn convert_int4_sat_rtn(uchar4);\n" |
| 29438 | "int4 __ovld __cnfn convert_int4(uchar4);\n" |
| 29439 | "int4 __ovld __cnfn convert_int4_sat(uchar4);\n" |
| 29440 | "int4 __ovld __cnfn convert_int4_rte(short4);\n" |
| 29441 | "int4 __ovld __cnfn convert_int4_sat_rte(short4);\n" |
| 29442 | "int4 __ovld __cnfn convert_int4_rtz(short4);\n" |
| 29443 | "int4 __ovld __cnfn convert_int4_sat_rtz(short4);\n" |
| 29444 | "int4 __ovld __cnfn convert_int4_rtp(short4);\n" |
| 29445 | "int4 __ovld __cnfn convert_int4_sat_rtp(short4);\n" |
| 29446 | "int4 __ovld __cnfn convert_int4_rtn(short4);\n" |
| 29447 | "int4 __ovld __cnfn convert_int4_sat_rtn(short4);\n" |
| 29448 | "int4 __ovld __cnfn convert_int4(short4);\n" |
| 29449 | "int4 __ovld __cnfn convert_int4_sat(short4);\n" |
| 29450 | "int4 __ovld __cnfn convert_int4_rte(ushort4);\n" |
| 29451 | "int4 __ovld __cnfn convert_int4_sat_rte(ushort4);\n" |
| 29452 | "int4 __ovld __cnfn convert_int4_rtz(ushort4);\n" |
| 29453 | "int4 __ovld __cnfn convert_int4_sat_rtz(ushort4);\n" |
| 29454 | "int4 __ovld __cnfn convert_int4_rtp(ushort4);\n" |
| 29455 | "int4 __ovld __cnfn convert_int4_sat_rtp(ushort4);\n" |
| 29456 | "int4 __ovld __cnfn convert_int4_rtn(ushort4);\n" |
| 29457 | "int4 __ovld __cnfn convert_int4_sat_rtn(ushort4);\n" |
| 29458 | "int4 __ovld __cnfn convert_int4(ushort4);\n" |
| 29459 | "int4 __ovld __cnfn convert_int4_sat(ushort4);\n" |
| 29460 | "int4 __ovld __cnfn convert_int4_rte(int4);\n" |
| 29461 | "int4 __ovld __cnfn convert_int4_sat_rte(int4);\n" |
| 29462 | "int4 __ovld __cnfn convert_int4_rtz(int4);\n" |
| 29463 | "int4 __ovld __cnfn convert_int4_sat_rtz(int4);\n" |
| 29464 | "int4 __ovld __cnfn convert_int4_rtp(int4);\n" |
| 29465 | "int4 __ovld __cnfn convert_int4_sat_rtp(int4);\n" |
| 29466 | "int4 __ovld __cnfn convert_int4_rtn(int4);\n" |
| 29467 | "int4 __ovld __cnfn convert_int4_sat_rtn(int4);\n" |
| 29468 | "int4 __ovld __cnfn convert_int4(int4);\n" |
| 29469 | "int4 __ovld __cnfn convert_int4_sat(int4);\n" |
| 29470 | "int4 __ovld __cnfn convert_int4_rte(uint4);\n" |
| 29471 | "int4 __ovld __cnfn convert_int4_sat_rte(uint4);\n" |
| 29472 | "int4 __ovld __cnfn convert_int4_rtz(uint4);\n" |
| 29473 | "int4 __ovld __cnfn convert_int4_sat_rtz(uint4);\n" |
| 29474 | "int4 __ovld __cnfn convert_int4_rtp(uint4);\n" |
| 29475 | "int4 __ovld __cnfn convert_int4_sat_rtp(uint4);\n" |
| 29476 | "int4 __ovld __cnfn convert_int4_rtn(uint4);\n" |
| 29477 | "int4 __ovld __cnfn convert_int4_sat_rtn(uint4);\n" |
| 29478 | "int4 __ovld __cnfn convert_int4(uint4);\n" |
| 29479 | "int4 __ovld __cnfn convert_int4_sat(uint4);\n" |
| 29480 | "int4 __ovld __cnfn convert_int4_rte(long4);\n" |
| 29481 | "int4 __ovld __cnfn convert_int4_sat_rte(long4);\n" |
| 29482 | "int4 __ovld __cnfn convert_int4_rtz(long4);\n" |
| 29483 | "int4 __ovld __cnfn convert_int4_sat_rtz(long4);\n" |
| 29484 | "int4 __ovld __cnfn convert_int4_rtp(long4);\n" |
| 29485 | "int4 __ovld __cnfn convert_int4_sat_rtp(long4);\n" |
| 29486 | "int4 __ovld __cnfn convert_int4_rtn(long4);\n" |
| 29487 | "int4 __ovld __cnfn convert_int4_sat_rtn(long4);\n" |
| 29488 | "int4 __ovld __cnfn convert_int4(long4);\n" |
| 29489 | "int4 __ovld __cnfn convert_int4_sat(long4);\n" |
| 29490 | "int4 __ovld __cnfn convert_int4_rte(ulong4);\n" |
| 29491 | "int4 __ovld __cnfn convert_int4_sat_rte(ulong4);\n" |
| 29492 | "int4 __ovld __cnfn convert_int4_rtz(ulong4);\n" |
| 29493 | "int4 __ovld __cnfn convert_int4_sat_rtz(ulong4);\n" |
| 29494 | "int4 __ovld __cnfn convert_int4_rtp(ulong4);\n" |
| 29495 | "int4 __ovld __cnfn convert_int4_sat_rtp(ulong4);\n" |
| 29496 | "int4 __ovld __cnfn convert_int4_rtn(ulong4);\n" |
| 29497 | "int4 __ovld __cnfn convert_int4_sat_rtn(ulong4);\n" |
| 29498 | "int4 __ovld __cnfn convert_int4(ulong4);\n" |
| 29499 | "int4 __ovld __cnfn convert_int4_sat(ulong4);\n" |
| 29500 | "int4 __ovld __cnfn convert_int4_rte(float4);\n" |
| 29501 | "int4 __ovld __cnfn convert_int4_sat_rte(float4);\n" |
| 29502 | "int4 __ovld __cnfn convert_int4_rtz(float4);\n" |
| 29503 | "int4 __ovld __cnfn convert_int4_sat_rtz(float4);\n" |
| 29504 | "int4 __ovld __cnfn convert_int4_rtp(float4);\n" |
| 29505 | "int4 __ovld __cnfn convert_int4_sat_rtp(float4);\n" |
| 29506 | "int4 __ovld __cnfn convert_int4_rtn(float4);\n" |
| 29507 | "int4 __ovld __cnfn convert_int4_sat_rtn(float4);\n" |
| 29508 | "int4 __ovld __cnfn convert_int4(float4);\n" |
| 29509 | "int4 __ovld __cnfn convert_int4_sat(float4);\n" |
| 29510 | "uint4 __ovld __cnfn convert_uint4_rte(char4);\n" |
| 29511 | "uint4 __ovld __cnfn convert_uint4_sat_rte(char4);\n" |
| 29512 | "uint4 __ovld __cnfn convert_uint4_rtz(char4);\n" |
| 29513 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(char4);\n" |
| 29514 | "uint4 __ovld __cnfn convert_uint4_rtp(char4);\n" |
| 29515 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(char4);\n" |
| 29516 | "uint4 __ovld __cnfn convert_uint4_rtn(char4);\n" |
| 29517 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(char4);\n" |
| 29518 | "uint4 __ovld __cnfn convert_uint4(char4);\n" |
| 29519 | "uint4 __ovld __cnfn convert_uint4_sat(char4);\n" |
| 29520 | "uint4 __ovld __cnfn convert_uint4_rte(uchar4);\n" |
| 29521 | "uint4 __ovld __cnfn convert_uint4_sat_rte(uchar4);\n" |
| 29522 | "uint4 __ovld __cnfn convert_uint4_rtz(uchar4);\n" |
| 29523 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(uchar4);\n" |
| 29524 | "uint4 __ovld __cnfn convert_uint4_rtp(uchar4);\n" |
| 29525 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(uchar4);\n" |
| 29526 | "uint4 __ovld __cnfn convert_uint4_rtn(uchar4);\n" |
| 29527 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(uchar4);\n" |
| 29528 | "uint4 __ovld __cnfn convert_uint4(uchar4);\n" |
| 29529 | "uint4 __ovld __cnfn convert_uint4_sat(uchar4);\n" |
| 29530 | "uint4 __ovld __cnfn convert_uint4_rte(short4);\n" |
| 29531 | "uint4 __ovld __cnfn convert_uint4_sat_rte(short4);\n" |
| 29532 | "uint4 __ovld __cnfn convert_uint4_rtz(short4);\n" |
| 29533 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(short4);\n" |
| 29534 | "uint4 __ovld __cnfn convert_uint4_rtp(short4);\n" |
| 29535 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(short4);\n" |
| 29536 | "uint4 __ovld __cnfn convert_uint4_rtn(short4);\n" |
| 29537 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(short4);\n" |
| 29538 | "uint4 __ovld __cnfn convert_uint4(short4);\n" |
| 29539 | "uint4 __ovld __cnfn convert_uint4_sat(short4);\n" |
| 29540 | "uint4 __ovld __cnfn convert_uint4_rte(ushort4);\n" |
| 29541 | "uint4 __ovld __cnfn convert_uint4_sat_rte(ushort4);\n" |
| 29542 | "uint4 __ovld __cnfn convert_uint4_rtz(ushort4);\n" |
| 29543 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(ushort4);\n" |
| 29544 | "uint4 __ovld __cnfn convert_uint4_rtp(ushort4);\n" |
| 29545 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(ushort4);\n" |
| 29546 | "uint4 __ovld __cnfn convert_uint4_rtn(ushort4);\n" |
| 29547 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(ushort4);\n" |
| 29548 | "uint4 __ovld __cnfn convert_uint4(ushort4);\n" |
| 29549 | "uint4 __ovld __cnfn convert_uint4_sat(ushort4);\n" |
| 29550 | "uint4 __ovld __cnfn convert_uint4_rte(int4);\n" |
| 29551 | "uint4 __ovld __cnfn convert_uint4_sat_rte(int4);\n" |
| 29552 | "uint4 __ovld __cnfn convert_uint4_rtz(int4);\n" |
| 29553 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(int4);\n" |
| 29554 | "uint4 __ovld __cnfn convert_uint4_rtp(int4);\n" |
| 29555 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(int4);\n" |
| 29556 | "uint4 __ovld __cnfn convert_uint4_rtn(int4);\n" |
| 29557 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(int4);\n" |
| 29558 | "uint4 __ovld __cnfn convert_uint4(int4);\n" |
| 29559 | "uint4 __ovld __cnfn convert_uint4_sat(int4);\n" |
| 29560 | "uint4 __ovld __cnfn convert_uint4_rte(uint4);\n" |
| 29561 | "uint4 __ovld __cnfn convert_uint4_sat_rte(uint4);\n" |
| 29562 | "uint4 __ovld __cnfn convert_uint4_rtz(uint4);\n" |
| 29563 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(uint4);\n" |
| 29564 | "uint4 __ovld __cnfn convert_uint4_rtp(uint4);\n" |
| 29565 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(uint4);\n" |
| 29566 | "uint4 __ovld __cnfn convert_uint4_rtn(uint4);\n" |
| 29567 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(uint4);\n" |
| 29568 | "uint4 __ovld __cnfn convert_uint4(uint4);\n" |
| 29569 | "uint4 __ovld __cnfn convert_uint4_sat(uint4);\n" |
| 29570 | "uint4 __ovld __cnfn convert_uint4_rte(long4);\n" |
| 29571 | "uint4 __ovld __cnfn convert_uint4_sat_rte(long4);\n" |
| 29572 | "uint4 __ovld __cnfn convert_uint4_rtz(long4);\n" |
| 29573 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(long4);\n" |
| 29574 | "uint4 __ovld __cnfn convert_uint4_rtp(long4);\n" |
| 29575 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(long4);\n" |
| 29576 | "uint4 __ovld __cnfn convert_uint4_rtn(long4);\n" |
| 29577 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(long4);\n" |
| 29578 | "uint4 __ovld __cnfn convert_uint4(long4);\n" |
| 29579 | "uint4 __ovld __cnfn convert_uint4_sat(long4);\n" |
| 29580 | "uint4 __ovld __cnfn convert_uint4_rte(ulong4);\n" |
| 29581 | "uint4 __ovld __cnfn convert_uint4_sat_rte(ulong4);\n" |
| 29582 | "uint4 __ovld __cnfn convert_uint4_rtz(ulong4);\n" |
| 29583 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(ulong4);\n" |
| 29584 | "uint4 __ovld __cnfn convert_uint4_rtp(ulong4);\n" |
| 29585 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(ulong4);\n" |
| 29586 | "uint4 __ovld __cnfn convert_uint4_rtn(ulong4);\n" |
| 29587 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(ulong4);\n" |
| 29588 | "uint4 __ovld __cnfn convert_uint4(ulong4);\n" |
| 29589 | "uint4 __ovld __cnfn convert_uint4_sat(ulong4);\n" |
| 29590 | "uint4 __ovld __cnfn convert_uint4_rte(float4);\n" |
| 29591 | "uint4 __ovld __cnfn convert_uint4_sat_rte(float4);\n" |
| 29592 | "uint4 __ovld __cnfn convert_uint4_rtz(float4);\n" |
| 29593 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(float4);\n" |
| 29594 | "uint4 __ovld __cnfn convert_uint4_rtp(float4);\n" |
| 29595 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(float4);\n" |
| 29596 | "uint4 __ovld __cnfn convert_uint4_rtn(float4);\n" |
| 29597 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(float4);\n" |
| 29598 | "uint4 __ovld __cnfn convert_uint4(float4);\n" |
| 29599 | "uint4 __ovld __cnfn convert_uint4_sat(float4);\n" |
| 29600 | "long4 __ovld __cnfn convert_long4_rte(char4);\n" |
| 29601 | "long4 __ovld __cnfn convert_long4_sat_rte(char4);\n" |
| 29602 | "long4 __ovld __cnfn convert_long4_rtz(char4);\n" |
| 29603 | "long4 __ovld __cnfn convert_long4_sat_rtz(char4);\n" |
| 29604 | "long4 __ovld __cnfn convert_long4_rtp(char4);\n" |
| 29605 | "long4 __ovld __cnfn convert_long4_sat_rtp(char4);\n" |
| 29606 | "long4 __ovld __cnfn convert_long4_rtn(char4);\n" |
| 29607 | "long4 __ovld __cnfn convert_long4_sat_rtn(char4);\n" |
| 29608 | "long4 __ovld __cnfn convert_long4(char4);\n" |
| 29609 | "long4 __ovld __cnfn convert_long4_sat(char4);\n" |
| 29610 | "long4 __ovld __cnfn convert_long4_rte(uchar4);\n" |
| 29611 | "long4 __ovld __cnfn convert_long4_sat_rte(uchar4);\n" |
| 29612 | "long4 __ovld __cnfn convert_long4_rtz(uchar4);\n" |
| 29613 | "long4 __ovld __cnfn convert_long4_sat_rtz(uchar4);\n" |
| 29614 | "long4 __ovld __cnfn convert_long4_rtp(uchar4);\n" |
| 29615 | "long4 __ovld __cnfn convert_long4_sat_rtp(uchar4);\n" |
| 29616 | "long4 __ovld __cnfn convert_long4_rtn(uchar4);\n" |
| 29617 | "long4 __ovld __cnfn convert_long4_sat_rtn(uchar4);\n" |
| 29618 | "long4 __ovld __cnfn convert_long4(uchar4);\n" |
| 29619 | "long4 __ovld __cnfn convert_long4_sat(uchar4);\n" |
| 29620 | "long4 __ovld __cnfn convert_long4_rte(short4);\n" |
| 29621 | "long4 __ovld __cnfn convert_long4_sat_rte(short4);\n" |
| 29622 | "long4 __ovld __cnfn convert_long4_rtz(short4);\n" |
| 29623 | "long4 __ovld __cnfn convert_long4_sat_rtz(short4);\n" |
| 29624 | "long4 __ovld __cnfn convert_long4_rtp(short4);\n" |
| 29625 | "long4 __ovld __cnfn convert_long4_sat_rtp(short4);\n" |
| 29626 | "long4 __ovld __cnfn convert_long4_rtn(short4);\n" |
| 29627 | "long4 __ovld __cnfn convert_long4_sat_rtn(short4);\n" |
| 29628 | "long4 __ovld __cnfn convert_long4(short4);\n" |
| 29629 | "long4 __ovld __cnfn convert_long4_sat(short4);\n" |
| 29630 | "long4 __ovld __cnfn convert_long4_rte(ushort4);\n" |
| 29631 | "long4 __ovld __cnfn convert_long4_sat_rte(ushort4);\n" |
| 29632 | "long4 __ovld __cnfn convert_long4_rtz(ushort4);\n" |
| 29633 | "long4 __ovld __cnfn convert_long4_sat_rtz(ushort4);\n" |
| 29634 | "long4 __ovld __cnfn convert_long4_rtp(ushort4);\n" |
| 29635 | "long4 __ovld __cnfn convert_long4_sat_rtp(ushort4);\n" |
| 29636 | "long4 __ovld __cnfn convert_long4_rtn(ushort4);\n" |
| 29637 | "long4 __ovld __cnfn convert_long4_sat_rtn(ushort4);\n" |
| 29638 | "long4 __ovld __cnfn convert_long4(ushort4);\n" |
| 29639 | "long4 __ovld __cnfn convert_long4_sat(ushort4);\n" |
| 29640 | "long4 __ovld __cnfn convert_long4_rte(int4);\n" |
| 29641 | "long4 __ovld __cnfn convert_long4_sat_rte(int4);\n" |
| 29642 | "long4 __ovld __cnfn convert_long4_rtz(int4);\n" |
| 29643 | "long4 __ovld __cnfn convert_long4_sat_rtz(int4);\n" |
| 29644 | "long4 __ovld __cnfn convert_long4_rtp(int4);\n" |
| 29645 | "long4 __ovld __cnfn convert_long4_sat_rtp(int4);\n" |
| 29646 | "long4 __ovld __cnfn convert_long4_rtn(int4);\n" |
| 29647 | "long4 __ovld __cnfn convert_long4_sat_rtn(int4);\n" |
| 29648 | "long4 __ovld __cnfn convert_long4(int4);\n" |
| 29649 | "long4 __ovld __cnfn convert_long4_sat(int4);\n" |
| 29650 | "long4 __ovld __cnfn convert_long4_rte(uint4);\n" |
| 29651 | "long4 __ovld __cnfn convert_long4_sat_rte(uint4);\n" |
| 29652 | "long4 __ovld __cnfn convert_long4_rtz(uint4);\n" |
| 29653 | "long4 __ovld __cnfn convert_long4_sat_rtz(uint4);\n" |
| 29654 | "long4 __ovld __cnfn convert_long4_rtp(uint4);\n" |
| 29655 | "long4 __ovld __cnfn convert_long4_sat_rtp(uint4);\n" |
| 29656 | "long4 __ovld __cnfn convert_long4_rtn(uint4);\n" |
| 29657 | "long4 __ovld __cnfn convert_long4_sat_rtn(uint4);\n" |
| 29658 | "long4 __ovld __cnfn convert_long4(uint4);\n" |
| 29659 | "long4 __ovld __cnfn convert_long4_sat(uint4);\n" |
| 29660 | "long4 __ovld __cnfn convert_long4_rte(long4);\n" |
| 29661 | "long4 __ovld __cnfn convert_long4_sat_rte(long4);\n" |
| 29662 | "long4 __ovld __cnfn convert_long4_rtz(long4);\n" |
| 29663 | "long4 __ovld __cnfn convert_long4_sat_rtz(long4);\n" |
| 29664 | "long4 __ovld __cnfn convert_long4_rtp(long4);\n" |
| 29665 | "long4 __ovld __cnfn convert_long4_sat_rtp(long4);\n" |
| 29666 | "long4 __ovld __cnfn convert_long4_rtn(long4);\n" |
| 29667 | "long4 __ovld __cnfn convert_long4_sat_rtn(long4);\n" |
| 29668 | "long4 __ovld __cnfn convert_long4(long4);\n" |
| 29669 | "long4 __ovld __cnfn convert_long4_sat(long4);\n" |
| 29670 | "long4 __ovld __cnfn convert_long4_rte(ulong4);\n" |
| 29671 | "long4 __ovld __cnfn convert_long4_sat_rte(ulong4);\n" |
| 29672 | "long4 __ovld __cnfn convert_long4_rtz(ulong4);\n" |
| 29673 | "long4 __ovld __cnfn convert_long4_sat_rtz(ulong4);\n" |
| 29674 | "long4 __ovld __cnfn convert_long4_rtp(ulong4);\n" |
| 29675 | "long4 __ovld __cnfn convert_long4_sat_rtp(ulong4);\n" |
| 29676 | "long4 __ovld __cnfn convert_long4_rtn(ulong4);\n" |
| 29677 | "long4 __ovld __cnfn convert_long4_sat_rtn(ulong4);\n" |
| 29678 | "long4 __ovld __cnfn convert_long4(ulong4);\n" |
| 29679 | "long4 __ovld __cnfn convert_long4_sat(ulong4);\n" |
| 29680 | "long4 __ovld __cnfn convert_long4_rte(float4);\n" |
| 29681 | "long4 __ovld __cnfn convert_long4_sat_rte(float4);\n" |
| 29682 | "long4 __ovld __cnfn convert_long4_rtz(float4);\n" |
| 29683 | "long4 __ovld __cnfn convert_long4_sat_rtz(float4);\n" |
| 29684 | "long4 __ovld __cnfn convert_long4_rtp(float4);\n" |
| 29685 | "long4 __ovld __cnfn convert_long4_sat_rtp(float4);\n" |
| 29686 | "long4 __ovld __cnfn convert_long4_rtn(float4);\n" |
| 29687 | "long4 __ovld __cnfn convert_long4_sat_rtn(float4);\n" |
| 29688 | "long4 __ovld __cnfn convert_long4(float4);\n" |
| 29689 | "long4 __ovld __cnfn convert_long4_sat(float4);\n" |
| 29690 | "ulong4 __ovld __cnfn convert_ulong4_rte(char4);\n" |
| 29691 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(char4);\n" |
| 29692 | "ulong4 __ovld __cnfn convert_ulong4_rtz(char4);\n" |
| 29693 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(char4);\n" |
| 29694 | "ulong4 __ovld __cnfn convert_ulong4_rtp(char4);\n" |
| 29695 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(char4);\n" |
| 29696 | "ulong4 __ovld __cnfn convert_ulong4_rtn(char4);\n" |
| 29697 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(char4);\n" |
| 29698 | "ulong4 __ovld __cnfn convert_ulong4(char4);\n" |
| 29699 | "ulong4 __ovld __cnfn convert_ulong4_sat(char4);\n" |
| 29700 | "ulong4 __ovld __cnfn convert_ulong4_rte(uchar4);\n" |
| 29701 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(uchar4);\n" |
| 29702 | "ulong4 __ovld __cnfn convert_ulong4_rtz(uchar4);\n" |
| 29703 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uchar4);\n" |
| 29704 | "ulong4 __ovld __cnfn convert_ulong4_rtp(uchar4);\n" |
| 29705 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uchar4);\n" |
| 29706 | "ulong4 __ovld __cnfn convert_ulong4_rtn(uchar4);\n" |
| 29707 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uchar4);\n" |
| 29708 | "ulong4 __ovld __cnfn convert_ulong4(uchar4);\n" |
| 29709 | "ulong4 __ovld __cnfn convert_ulong4_sat(uchar4);\n" |
| 29710 | "ulong4 __ovld __cnfn convert_ulong4_rte(short4);\n" |
| 29711 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(short4);\n" |
| 29712 | "ulong4 __ovld __cnfn convert_ulong4_rtz(short4);\n" |
| 29713 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(short4);\n" |
| 29714 | "ulong4 __ovld __cnfn convert_ulong4_rtp(short4);\n" |
| 29715 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(short4);\n" |
| 29716 | "ulong4 __ovld __cnfn convert_ulong4_rtn(short4);\n" |
| 29717 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(short4);\n" |
| 29718 | "ulong4 __ovld __cnfn convert_ulong4(short4);\n" |
| 29719 | "ulong4 __ovld __cnfn convert_ulong4_sat(short4);\n" |
| 29720 | "ulong4 __ovld __cnfn convert_ulong4_rte(ushort4);\n" |
| 29721 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(ushort4);\n" |
| 29722 | "ulong4 __ovld __cnfn convert_ulong4_rtz(ushort4);\n" |
| 29723 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ushort4);\n" |
| 29724 | "ulong4 __ovld __cnfn convert_ulong4_rtp(ushort4);\n" |
| 29725 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ushort4);\n" |
| 29726 | "ulong4 __ovld __cnfn convert_ulong4_rtn(ushort4);\n" |
| 29727 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ushort4);\n" |
| 29728 | "ulong4 __ovld __cnfn convert_ulong4(ushort4);\n" |
| 29729 | "ulong4 __ovld __cnfn convert_ulong4_sat(ushort4);\n" |
| 29730 | "ulong4 __ovld __cnfn convert_ulong4_rte(int4);\n" |
| 29731 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(int4);\n" |
| 29732 | "ulong4 __ovld __cnfn convert_ulong4_rtz(int4);\n" |
| 29733 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(int4);\n" |
| 29734 | "ulong4 __ovld __cnfn convert_ulong4_rtp(int4);\n" |
| 29735 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(int4);\n" |
| 29736 | "ulong4 __ovld __cnfn convert_ulong4_rtn(int4);\n" |
| 29737 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(int4);\n" |
| 29738 | "ulong4 __ovld __cnfn convert_ulong4(int4);\n" |
| 29739 | "ulong4 __ovld __cnfn convert_ulong4_sat(int4);\n" |
| 29740 | "ulong4 __ovld __cnfn convert_ulong4_rte(uint4);\n" |
| 29741 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(uint4);\n" |
| 29742 | "ulong4 __ovld __cnfn convert_ulong4_rtz(uint4);\n" |
| 29743 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(uint4);\n" |
| 29744 | "ulong4 __ovld __cnfn convert_ulong4_rtp(uint4);\n" |
| 29745 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(uint4);\n" |
| 29746 | "ulong4 __ovld __cnfn convert_ulong4_rtn(uint4);\n" |
| 29747 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(uint4);\n" |
| 29748 | "ulong4 __ovld __cnfn convert_ulong4(uint4);\n" |
| 29749 | "ulong4 __ovld __cnfn convert_ulong4_sat(uint4);\n" |
| 29750 | "ulong4 __ovld __cnfn convert_ulong4_rte(long4);\n" |
| 29751 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(long4);\n" |
| 29752 | "ulong4 __ovld __cnfn convert_ulong4_rtz(long4);\n" |
| 29753 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(long4);\n" |
| 29754 | "ulong4 __ovld __cnfn convert_ulong4_rtp(long4);\n" |
| 29755 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(long4);\n" |
| 29756 | "ulong4 __ovld __cnfn convert_ulong4_rtn(long4);\n" |
| 29757 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(long4);\n" |
| 29758 | "ulong4 __ovld __cnfn convert_ulong4(long4);\n" |
| 29759 | "ulong4 __ovld __cnfn convert_ulong4_sat(long4);\n" |
| 29760 | "ulong4 __ovld __cnfn convert_ulong4_rte(ulong4);\n" |
| 29761 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(ulong4);\n" |
| 29762 | "ulong4 __ovld __cnfn convert_ulong4_rtz(ulong4);\n" |
| 29763 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(ulong4);\n" |
| 29764 | "ulong4 __ovld __cnfn convert_ulong4_rtp(ulong4);\n" |
| 29765 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(ulong4);\n" |
| 29766 | "ulong4 __ovld __cnfn convert_ulong4_rtn(ulong4);\n" |
| 29767 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(ulong4);\n" |
| 29768 | "ulong4 __ovld __cnfn convert_ulong4(ulong4);\n" |
| 29769 | "ulong4 __ovld __cnfn convert_ulong4_sat(ulong4);\n" |
| 29770 | "ulong4 __ovld __cnfn convert_ulong4_rte(float4);\n" |
| 29771 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(float4);\n" |
| 29772 | "ulong4 __ovld __cnfn convert_ulong4_rtz(float4);\n" |
| 29773 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(float4);\n" |
| 29774 | "ulong4 __ovld __cnfn convert_ulong4_rtp(float4);\n" |
| 29775 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(float4);\n" |
| 29776 | "ulong4 __ovld __cnfn convert_ulong4_rtn(float4);\n" |
| 29777 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(float4);\n" |
| 29778 | "ulong4 __ovld __cnfn convert_ulong4(float4);\n" |
| 29779 | "ulong4 __ovld __cnfn convert_ulong4_sat(float4);\n" |
| 29780 | "float4 __ovld __cnfn convert_float4_rte(char4);\n" |
| 29781 | "float4 __ovld __cnfn convert_float4_rtz(char4);\n" |
| 29782 | "float4 __ovld __cnfn convert_float4_rtp(char4);\n" |
| 29783 | "float4 __ovld __cnfn convert_float4_rtn(char4);\n" |
| 29784 | "float4 __ovld __cnfn convert_float4(char4);\n" |
| 29785 | "float4 __ovld __cnfn convert_float4_rte(uchar4);\n" |
| 29786 | "float4 __ovld __cnfn convert_float4_rtz(uchar4);\n" |
| 29787 | "float4 __ovld __cnfn convert_float4_rtp(uchar4);\n" |
| 29788 | "float4 __ovld __cnfn convert_float4_rtn(uchar4);\n" |
| 29789 | "float4 __ovld __cnfn convert_float4(uchar4);\n" |
| 29790 | "float4 __ovld __cnfn convert_float4_rte(short4);\n" |
| 29791 | "float4 __ovld __cnfn convert_float4_rtz(short4);\n" |
| 29792 | "float4 __ovld __cnfn convert_float4_rtp(short4);\n" |
| 29793 | "float4 __ovld __cnfn convert_float4_rtn(short4);\n" |
| 29794 | "float4 __ovld __cnfn convert_float4(short4);\n" |
| 29795 | "float4 __ovld __cnfn convert_float4_rte(ushort4);\n" |
| 29796 | "float4 __ovld __cnfn convert_float4_rtz(ushort4);\n" |
| 29797 | "float4 __ovld __cnfn convert_float4_rtp(ushort4);\n" |
| 29798 | "float4 __ovld __cnfn convert_float4_rtn(ushort4);\n" |
| 29799 | "float4 __ovld __cnfn convert_float4(ushort4);\n" |
| 29800 | "float4 __ovld __cnfn convert_float4_rte(int4);\n" |
| 29801 | "float4 __ovld __cnfn convert_float4_rtz(int4);\n" |
| 29802 | "float4 __ovld __cnfn convert_float4_rtp(int4);\n" |
| 29803 | "float4 __ovld __cnfn convert_float4_rtn(int4);\n" |
| 29804 | "float4 __ovld __cnfn convert_float4(int4);\n" |
| 29805 | "float4 __ovld __cnfn convert_float4_rte(uint4);\n" |
| 29806 | "float4 __ovld __cnfn convert_float4_rtz(uint4);\n" |
| 29807 | "float4 __ovld __cnfn convert_float4_rtp(uint4);\n" |
| 29808 | "float4 __ovld __cnfn convert_float4_rtn(uint4);\n" |
| 29809 | "float4 __ovld __cnfn convert_float4(uint4);\n" |
| 29810 | "float4 __ovld __cnfn convert_float4_rte(long4);\n" |
| 29811 | "float4 __ovld __cnfn convert_float4_rtz(long4);\n" |
| 29812 | "float4 __ovld __cnfn convert_float4_rtp(long4);\n" |
| 29813 | "float4 __ovld __cnfn convert_float4_rtn(long4);\n" |
| 29814 | "float4 __ovld __cnfn convert_float4(long4);\n" |
| 29815 | "float4 __ovld __cnfn convert_float4_rte(ulong4);\n" |
| 29816 | "float4 __ovld __cnfn convert_float4_rtz(ulong4);\n" |
| 29817 | "float4 __ovld __cnfn convert_float4_rtp(ulong4);\n" |
| 29818 | "float4 __ovld __cnfn convert_float4_rtn(ulong4);\n" |
| 29819 | "float4 __ovld __cnfn convert_float4(ulong4);\n" |
| 29820 | "float4 __ovld __cnfn convert_float4_rte(float4);\n" |
| 29821 | "float4 __ovld __cnfn convert_float4_rtz(float4);\n" |
| 29822 | "float4 __ovld __cnfn convert_float4_rtp(float4);\n" |
| 29823 | "float4 __ovld __cnfn convert_float4_rtn(float4);\n" |
| 29824 | "float4 __ovld __cnfn convert_float4(float4);\n" |
| 29825 | "char8 __ovld __cnfn convert_char8_rte(char8);\n" |
| 29826 | "char8 __ovld __cnfn convert_char8_sat_rte(char8);\n" |
| 29827 | "char8 __ovld __cnfn convert_char8_rtz(char8);\n" |
| 29828 | "char8 __ovld __cnfn convert_char8_sat_rtz(char8);\n" |
| 29829 | "char8 __ovld __cnfn convert_char8_rtp(char8);\n" |
| 29830 | "char8 __ovld __cnfn convert_char8_sat_rtp(char8);\n" |
| 29831 | "char8 __ovld __cnfn convert_char8_rtn(char8);\n" |
| 29832 | "char8 __ovld __cnfn convert_char8_sat_rtn(char8);\n" |
| 29833 | "char8 __ovld __cnfn convert_char8(char8);\n" |
| 29834 | "char8 __ovld __cnfn convert_char8_sat(char8);\n" |
| 29835 | "char8 __ovld __cnfn convert_char8_rte(uchar8);\n" |
| 29836 | "char8 __ovld __cnfn convert_char8_sat_rte(uchar8);\n" |
| 29837 | "char8 __ovld __cnfn convert_char8_rtz(uchar8);\n" |
| 29838 | "char8 __ovld __cnfn convert_char8_sat_rtz(uchar8);\n" |
| 29839 | "char8 __ovld __cnfn convert_char8_rtp(uchar8);\n" |
| 29840 | "char8 __ovld __cnfn convert_char8_sat_rtp(uchar8);\n" |
| 29841 | "char8 __ovld __cnfn convert_char8_rtn(uchar8);\n" |
| 29842 | "char8 __ovld __cnfn convert_char8_sat_rtn(uchar8);\n" |
| 29843 | "char8 __ovld __cnfn convert_char8(uchar8);\n" |
| 29844 | "char8 __ovld __cnfn convert_char8_sat(uchar8);\n" |
| 29845 | "char8 __ovld __cnfn convert_char8_rte(short8);\n" |
| 29846 | "char8 __ovld __cnfn convert_char8_sat_rte(short8);\n" |
| 29847 | "char8 __ovld __cnfn convert_char8_rtz(short8);\n" |
| 29848 | "char8 __ovld __cnfn convert_char8_sat_rtz(short8);\n" |
| 29849 | "char8 __ovld __cnfn convert_char8_rtp(short8);\n" |
| 29850 | "char8 __ovld __cnfn convert_char8_sat_rtp(short8);\n" |
| 29851 | "char8 __ovld __cnfn convert_char8_rtn(short8);\n" |
| 29852 | "char8 __ovld __cnfn convert_char8_sat_rtn(short8);\n" |
| 29853 | "char8 __ovld __cnfn convert_char8(short8);\n" |
| 29854 | "char8 __ovld __cnfn convert_char8_sat(short8);\n" |
| 29855 | "char8 __ovld __cnfn convert_char8_rte(ushort8);\n" |
| 29856 | "char8 __ovld __cnfn convert_char8_sat_rte(ushort8);\n" |
| 29857 | "char8 __ovld __cnfn convert_char8_rtz(ushort8);\n" |
| 29858 | "char8 __ovld __cnfn convert_char8_sat_rtz(ushort8);\n" |
| 29859 | "char8 __ovld __cnfn convert_char8_rtp(ushort8);\n" |
| 29860 | "char8 __ovld __cnfn convert_char8_sat_rtp(ushort8);\n" |
| 29861 | "char8 __ovld __cnfn convert_char8_rtn(ushort8);\n" |
| 29862 | "char8 __ovld __cnfn convert_char8_sat_rtn(ushort8);\n" |
| 29863 | "char8 __ovld __cnfn convert_char8(ushort8);\n" |
| 29864 | "char8 __ovld __cnfn convert_char8_sat(ushort8);\n" |
| 29865 | "char8 __ovld __cnfn convert_char8_rte(int8);\n" |
| 29866 | "char8 __ovld __cnfn convert_char8_sat_rte(int8);\n" |
| 29867 | "char8 __ovld __cnfn convert_char8_rtz(int8);\n" |
| 29868 | "char8 __ovld __cnfn convert_char8_sat_rtz(int8);\n" |
| 29869 | "char8 __ovld __cnfn convert_char8_rtp(int8);\n" |
| 29870 | "char8 __ovld __cnfn convert_char8_sat_rtp(int8);\n" |
| 29871 | "char8 __ovld __cnfn convert_char8_rtn(int8);\n" |
| 29872 | "char8 __ovld __cnfn convert_char8_sat_rtn(int8);\n" |
| 29873 | "char8 __ovld __cnfn convert_char8(int8);\n" |
| 29874 | "char8 __ovld __cnfn convert_char8_sat(int8);\n" |
| 29875 | "char8 __ovld __cnfn convert_char8_rte(uint8);\n" |
| 29876 | "char8 __ovld __cnfn convert_char8_sat_rte(uint8);\n" |
| 29877 | "char8 __ovld __cnfn convert_char8_rtz(uint8);\n" |
| 29878 | "char8 __ovld __cnfn convert_char8_sat_rtz(uint8);\n" |
| 29879 | "char8 __ovld __cnfn convert_char8_rtp(uint8);\n" |
| 29880 | "char8 __ovld __cnfn convert_char8_sat_rtp(uint8);\n" |
| 29881 | "char8 __ovld __cnfn convert_char8_rtn(uint8);\n" |
| 29882 | "char8 __ovld __cnfn convert_char8_sat_rtn(uint8);\n" |
| 29883 | "char8 __ovld __cnfn convert_char8(uint8);\n" |
| 29884 | "char8 __ovld __cnfn convert_char8_sat(uint8);\n" |
| 29885 | "char8 __ovld __cnfn convert_char8_rte(long8);\n" |
| 29886 | "char8 __ovld __cnfn convert_char8_sat_rte(long8);\n" |
| 29887 | "char8 __ovld __cnfn convert_char8_rtz(long8);\n" |
| 29888 | "char8 __ovld __cnfn convert_char8_sat_rtz(long8);\n" |
| 29889 | "char8 __ovld __cnfn convert_char8_rtp(long8);\n" |
| 29890 | "char8 __ovld __cnfn convert_char8_sat_rtp(long8);\n" |
| 29891 | "char8 __ovld __cnfn convert_char8_rtn(long8);\n" |
| 29892 | "char8 __ovld __cnfn convert_char8_sat_rtn(long8);\n" |
| 29893 | "char8 __ovld __cnfn convert_char8(long8);\n" |
| 29894 | "char8 __ovld __cnfn convert_char8_sat(long8);\n" |
| 29895 | "char8 __ovld __cnfn convert_char8_rte(ulong8);\n" |
| 29896 | "char8 __ovld __cnfn convert_char8_sat_rte(ulong8);\n" |
| 29897 | "char8 __ovld __cnfn convert_char8_rtz(ulong8);\n" |
| 29898 | "char8 __ovld __cnfn convert_char8_sat_rtz(ulong8);\n" |
| 29899 | "char8 __ovld __cnfn convert_char8_rtp(ulong8);\n" |
| 29900 | "char8 __ovld __cnfn convert_char8_sat_rtp(ulong8);\n" |
| 29901 | "char8 __ovld __cnfn convert_char8_rtn(ulong8);\n" |
| 29902 | "char8 __ovld __cnfn convert_char8_sat_rtn(ulong8);\n" |
| 29903 | "char8 __ovld __cnfn convert_char8(ulong8);\n" |
| 29904 | "char8 __ovld __cnfn convert_char8_sat(ulong8);\n" |
| 29905 | "char8 __ovld __cnfn convert_char8_rte(float8);\n" |
| 29906 | "char8 __ovld __cnfn convert_char8_sat_rte(float8);\n" |
| 29907 | "char8 __ovld __cnfn convert_char8_rtz(float8);\n" |
| 29908 | "char8 __ovld __cnfn convert_char8_sat_rtz(float8);\n" |
| 29909 | "char8 __ovld __cnfn convert_char8_rtp(float8);\n" |
| 29910 | "char8 __ovld __cnfn convert_char8_sat_rtp(float8);\n" |
| 29911 | "char8 __ovld __cnfn convert_char8_rtn(float8);\n" |
| 29912 | "char8 __ovld __cnfn convert_char8_sat_rtn(float8);\n" |
| 29913 | "char8 __ovld __cnfn convert_char8(float8);\n" |
| 29914 | "char8 __ovld __cnfn convert_char8_sat(float8);\n" |
| 29915 | "uchar8 __ovld __cnfn convert_uchar8_rte(char8);\n" |
| 29916 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(char8);\n" |
| 29917 | "uchar8 __ovld __cnfn convert_uchar8_rtz(char8);\n" |
| 29918 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(char8);\n" |
| 29919 | "uchar8 __ovld __cnfn convert_uchar8_rtp(char8);\n" |
| 29920 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(char8);\n" |
| 29921 | "uchar8 __ovld __cnfn convert_uchar8_rtn(char8);\n" |
| 29922 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(char8);\n" |
| 29923 | "uchar8 __ovld __cnfn convert_uchar8(char8);\n" |
| 29924 | "uchar8 __ovld __cnfn convert_uchar8_sat(char8);\n" |
| 29925 | "uchar8 __ovld __cnfn convert_uchar8_rte(uchar8);\n" |
| 29926 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(uchar8);\n" |
| 29927 | "uchar8 __ovld __cnfn convert_uchar8_rtz(uchar8);\n" |
| 29928 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uchar8);\n" |
| 29929 | "uchar8 __ovld __cnfn convert_uchar8_rtp(uchar8);\n" |
| 29930 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uchar8);\n" |
| 29931 | "uchar8 __ovld __cnfn convert_uchar8_rtn(uchar8);\n" |
| 29932 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uchar8);\n" |
| 29933 | "uchar8 __ovld __cnfn convert_uchar8(uchar8);\n" |
| 29934 | "uchar8 __ovld __cnfn convert_uchar8_sat(uchar8);\n" |
| 29935 | "uchar8 __ovld __cnfn convert_uchar8_rte(short8);\n" |
| 29936 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(short8);\n" |
| 29937 | "uchar8 __ovld __cnfn convert_uchar8_rtz(short8);\n" |
| 29938 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(short8);\n" |
| 29939 | "uchar8 __ovld __cnfn convert_uchar8_rtp(short8);\n" |
| 29940 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(short8);\n" |
| 29941 | "uchar8 __ovld __cnfn convert_uchar8_rtn(short8);\n" |
| 29942 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(short8);\n" |
| 29943 | "uchar8 __ovld __cnfn convert_uchar8(short8);\n" |
| 29944 | "uchar8 __ovld __cnfn convert_uchar8_sat(short8);\n" |
| 29945 | "uchar8 __ovld __cnfn convert_uchar8_rte(ushort8);\n" |
| 29946 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(ushort8);\n" |
| 29947 | "uchar8 __ovld __cnfn convert_uchar8_rtz(ushort8);\n" |
| 29948 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ushort8);\n" |
| 29949 | "uchar8 __ovld __cnfn convert_uchar8_rtp(ushort8);\n" |
| 29950 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ushort8);\n" |
| 29951 | "uchar8 __ovld __cnfn convert_uchar8_rtn(ushort8);\n" |
| 29952 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ushort8);\n" |
| 29953 | "uchar8 __ovld __cnfn convert_uchar8(ushort8);\n" |
| 29954 | "uchar8 __ovld __cnfn convert_uchar8_sat(ushort8);\n" |
| 29955 | "uchar8 __ovld __cnfn convert_uchar8_rte(int8);\n" |
| 29956 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(int8);\n" |
| 29957 | "uchar8 __ovld __cnfn convert_uchar8_rtz(int8);\n" |
| 29958 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(int8);\n" |
| 29959 | "uchar8 __ovld __cnfn convert_uchar8_rtp(int8);\n" |
| 29960 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(int8);\n" |
| 29961 | "uchar8 __ovld __cnfn convert_uchar8_rtn(int8);\n" |
| 29962 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(int8);\n" |
| 29963 | "uchar8 __ovld __cnfn convert_uchar8(int8);\n" |
| 29964 | "uchar8 __ovld __cnfn convert_uchar8_sat(int8);\n" |
| 29965 | "uchar8 __ovld __cnfn convert_uchar8_rte(uint8);\n" |
| 29966 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(uint8);\n" |
| 29967 | "uchar8 __ovld __cnfn convert_uchar8_rtz(uint8);\n" |
| 29968 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(uint8);\n" |
| 29969 | "uchar8 __ovld __cnfn convert_uchar8_rtp(uint8);\n" |
| 29970 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(uint8);\n" |
| 29971 | "uchar8 __ovld __cnfn convert_uchar8_rtn(uint8);\n" |
| 29972 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(uint8);\n" |
| 29973 | "uchar8 __ovld __cnfn convert_uchar8(uint8);\n" |
| 29974 | "uchar8 __ovld __cnfn convert_uchar8_sat(uint8);\n" |
| 29975 | "uchar8 __ovld __cnfn convert_uchar8_rte(long8);\n" |
| 29976 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(long8);\n" |
| 29977 | "uchar8 __ovld __cnfn convert_uchar8_rtz(long8);\n" |
| 29978 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(long8);\n" |
| 29979 | "uchar8 __ovld __cnfn convert_uchar8_rtp(long8);\n" |
| 29980 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(long8);\n" |
| 29981 | "uchar8 __ovld __cnfn convert_uchar8_rtn(long8);\n" |
| 29982 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(long8);\n" |
| 29983 | "uchar8 __ovld __cnfn convert_uchar8(long8);\n" |
| 29984 | "uchar8 __ovld __cnfn convert_uchar8_sat(long8);\n" |
| 29985 | "uchar8 __ovld __cnfn convert_uchar8_rte(ulong8);\n" |
| 29986 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(ulong8);\n" |
| 29987 | "uchar8 __ovld __cnfn convert_uchar8_rtz(ulong8);\n" |
| 29988 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(ulong8);\n" |
| 29989 | "uchar8 __ovld __cnfn convert_uchar8_rtp(ulong8);\n" |
| 29990 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(ulong8);\n" |
| 29991 | "uchar8 __ovld __cnfn convert_uchar8_rtn(ulong8);\n" |
| 29992 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(ulong8);\n" |
| 29993 | "uchar8 __ovld __cnfn convert_uchar8(ulong8);\n" |
| 29994 | "uchar8 __ovld __cnfn convert_uchar8_sat(ulong8);\n" |
| 29995 | "uchar8 __ovld __cnfn convert_uchar8_rte(float8);\n" |
| 29996 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(float8);\n" |
| 29997 | "uchar8 __ovld __cnfn convert_uchar8_rtz(float8);\n" |
| 29998 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(float8);\n" |
| 29999 | "uchar8 __ovld __cnfn convert_uchar8_rtp(float8);\n" |
| 30000 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(float8);\n" |
| 30001 | "uchar8 __ovld __cnfn convert_uchar8_rtn(float8);\n" |
| 30002 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(float8);\n" |
| 30003 | "uchar8 __ovld __cnfn convert_uchar8(float8);\n" |
| 30004 | "uchar8 __ovld __cnfn convert_uchar8_sat(float8);\n" |
| 30005 | "short8 __ovld __cnfn convert_short8_rte(char8);\n" |
| 30006 | "short8 __ovld __cnfn convert_short8_sat_rte(char8);\n" |
| 30007 | "short8 __ovld __cnfn convert_short8_rtz(char8);\n" |
| 30008 | "short8 __ovld __cnfn convert_short8_sat_rtz(char8);\n" |
| 30009 | "short8 __ovld __cnfn convert_short8_rtp(char8);\n" |
| 30010 | "short8 __ovld __cnfn convert_short8_sat_rtp(char8);\n" |
| 30011 | "short8 __ovld __cnfn convert_short8_rtn(char8);\n" |
| 30012 | "short8 __ovld __cnfn convert_short8_sat_rtn(char8);\n" |
| 30013 | "short8 __ovld __cnfn convert_short8(char8);\n" |
| 30014 | "short8 __ovld __cnfn convert_short8_sat(char8);\n" |
| 30015 | "short8 __ovld __cnfn convert_short8_rte(uchar8);\n" |
| 30016 | "short8 __ovld __cnfn convert_short8_sat_rte(uchar8);\n" |
| 30017 | "short8 __ovld __cnfn convert_short8_rtz(uchar8);\n" |
| 30018 | "short8 __ovld __cnfn convert_short8_sat_rtz(uchar8);\n" |
| 30019 | "short8 __ovld __cnfn convert_short8_rtp(uchar8);\n" |
| 30020 | "short8 __ovld __cnfn convert_short8_sat_rtp(uchar8);\n" |
| 30021 | "short8 __ovld __cnfn convert_short8_rtn(uchar8);\n" |
| 30022 | "short8 __ovld __cnfn convert_short8_sat_rtn(uchar8);\n" |
| 30023 | "short8 __ovld __cnfn convert_short8(uchar8);\n" |
| 30024 | "short8 __ovld __cnfn convert_short8_sat(uchar8);\n" |
| 30025 | "short8 __ovld __cnfn convert_short8_rte(short8);\n" |
| 30026 | "short8 __ovld __cnfn convert_short8_sat_rte(short8);\n" |
| 30027 | "short8 __ovld __cnfn convert_short8_rtz(short8);\n" |
| 30028 | "short8 __ovld __cnfn convert_short8_sat_rtz(short8);\n" |
| 30029 | "short8 __ovld __cnfn convert_short8_rtp(short8);\n" |
| 30030 | "short8 __ovld __cnfn convert_short8_sat_rtp(short8);\n" |
| 30031 | "short8 __ovld __cnfn convert_short8_rtn(short8);\n" |
| 30032 | "short8 __ovld __cnfn convert_short8_sat_rtn(short8);\n" |
| 30033 | "short8 __ovld __cnfn convert_short8(short8);\n" |
| 30034 | "short8 __ovld __cnfn convert_short8_sat(short8);\n" |
| 30035 | "short8 __ovld __cnfn convert_short8_rte(ushort8);\n" |
| 30036 | "short8 __ovld __cnfn convert_short8_sat_rte(ushort8);\n" |
| 30037 | "short8 __ovld __cnfn convert_short8_rtz(ushort8);\n" |
| 30038 | "short8 __ovld __cnfn convert_short8_sat_rtz(ushort8);\n" |
| 30039 | "short8 __ovld __cnfn convert_short8_rtp(ushort8);\n" |
| 30040 | "short8 __ovld __cnfn convert_short8_sat_rtp(ushort8);\n" |
| 30041 | "short8 __ovld __cnfn convert_short8_rtn(ushort8);\n" |
| 30042 | "short8 __ovld __cnfn convert_short8_sat_rtn(ushort8);\n" |
| 30043 | "short8 __ovld __cnfn convert_short8(ushort8);\n" |
| 30044 | "short8 __ovld __cnfn convert_short8_sat(ushort8);\n" |
| 30045 | "short8 __ovld __cnfn convert_short8_rte(int8);\n" |
| 30046 | "short8 __ovld __cnfn convert_short8_sat_rte(int8);\n" |
| 30047 | "short8 __ovld __cnfn convert_short8_rtz(int8);\n" |
| 30048 | "short8 __ovld __cnfn convert_short8_sat_rtz(int8);\n" |
| 30049 | "short8 __ovld __cnfn convert_short8_rtp(int8);\n" |
| 30050 | "short8 __ovld __cnfn convert_short8_sat_rtp(int8);\n" |
| 30051 | "short8 __ovld __cnfn convert_short8_rtn(int8);\n" |
| 30052 | "short8 __ovld __cnfn convert_short8_sat_rtn(int8);\n" |
| 30053 | "short8 __ovld __cnfn convert_short8(int8);\n" |
| 30054 | "short8 __ovld __cnfn convert_short8_sat(int8);\n" |
| 30055 | "short8 __ovld __cnfn convert_short8_rte(uint8);\n" |
| 30056 | "short8 __ovld __cnfn convert_short8_sat_rte(uint8);\n" |
| 30057 | "short8 __ovld __cnfn convert_short8_rtz(uint8);\n" |
| 30058 | "short8 __ovld __cnfn convert_short8_sat_rtz(uint8);\n" |
| 30059 | "short8 __ovld __cnfn convert_short8_rtp(uint8);\n" |
| 30060 | "short8 __ovld __cnfn convert_short8_sat_rtp(uint8);\n" |
| 30061 | "short8 __ovld __cnfn convert_short8_rtn(uint8);\n" |
| 30062 | "short8 __ovld __cnfn convert_short8_sat_rtn(uint8);\n" |
| 30063 | "short8 __ovld __cnfn convert_short8(uint8);\n" |
| 30064 | "short8 __ovld __cnfn convert_short8_sat(uint8);\n" |
| 30065 | "short8 __ovld __cnfn convert_short8_rte(long8);\n" |
| 30066 | "short8 __ovld __cnfn convert_short8_sat_rte(long8);\n" |
| 30067 | "short8 __ovld __cnfn convert_short8_rtz(long8);\n" |
| 30068 | "short8 __ovld __cnfn convert_short8_sat_rtz(long8);\n" |
| 30069 | "short8 __ovld __cnfn convert_short8_rtp(long8);\n" |
| 30070 | "short8 __ovld __cnfn convert_short8_sat_rtp(long8);\n" |
| 30071 | "short8 __ovld __cnfn convert_short8_rtn(long8);\n" |
| 30072 | "short8 __ovld __cnfn convert_short8_sat_rtn(long8);\n" |
| 30073 | "short8 __ovld __cnfn convert_short8(long8);\n" |
| 30074 | "short8 __ovld __cnfn convert_short8_sat(long8);\n" |
| 30075 | "short8 __ovld __cnfn convert_short8_rte(ulong8);\n" |
| 30076 | "short8 __ovld __cnfn convert_short8_sat_rte(ulong8);\n" |
| 30077 | "short8 __ovld __cnfn convert_short8_rtz(ulong8);\n" |
| 30078 | "short8 __ovld __cnfn convert_short8_sat_rtz(ulong8);\n" |
| 30079 | "short8 __ovld __cnfn convert_short8_rtp(ulong8);\n" |
| 30080 | "short8 __ovld __cnfn convert_short8_sat_rtp(ulong8);\n" |
| 30081 | "short8 __ovld __cnfn convert_short8_rtn(ulong8);\n" |
| 30082 | "short8 __ovld __cnfn convert_short8_sat_rtn(ulong8);\n" |
| 30083 | "short8 __ovld __cnfn convert_short8(ulong8);\n" |
| 30084 | "short8 __ovld __cnfn convert_short8_sat(ulong8);\n" |
| 30085 | "short8 __ovld __cnfn convert_short8_rte(float8);\n" |
| 30086 | "short8 __ovld __cnfn convert_short8_sat_rte(float8);\n" |
| 30087 | "short8 __ovld __cnfn convert_short8_rtz(float8);\n" |
| 30088 | "short8 __ovld __cnfn convert_short8_sat_rtz(float8);\n" |
| 30089 | "short8 __ovld __cnfn convert_short8_rtp(float8);\n" |
| 30090 | "short8 __ovld __cnfn convert_short8_sat_rtp(float8);\n" |
| 30091 | "short8 __ovld __cnfn convert_short8_rtn(float8);\n" |
| 30092 | "short8 __ovld __cnfn convert_short8_sat_rtn(float8);\n" |
| 30093 | "short8 __ovld __cnfn convert_short8(float8);\n" |
| 30094 | "short8 __ovld __cnfn convert_short8_sat(float8);\n" |
| 30095 | "ushort8 __ovld __cnfn convert_ushort8_rte(char8);\n" |
| 30096 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(char8);\n" |
| 30097 | "ushort8 __ovld __cnfn convert_ushort8_rtz(char8);\n" |
| 30098 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(char8);\n" |
| 30099 | "ushort8 __ovld __cnfn convert_ushort8_rtp(char8);\n" |
| 30100 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(char8);\n" |
| 30101 | "ushort8 __ovld __cnfn convert_ushort8_rtn(char8);\n" |
| 30102 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(char8);\n" |
| 30103 | "ushort8 __ovld __cnfn convert_ushort8(char8);\n" |
| 30104 | "ushort8 __ovld __cnfn convert_ushort8_sat(char8);\n" |
| 30105 | "ushort8 __ovld __cnfn convert_ushort8_rte(uchar8);\n" |
| 30106 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(uchar8);\n" |
| 30107 | "ushort8 __ovld __cnfn convert_ushort8_rtz(uchar8);\n" |
| 30108 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uchar8);\n" |
| 30109 | "ushort8 __ovld __cnfn convert_ushort8_rtp(uchar8);\n" |
| 30110 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uchar8);\n" |
| 30111 | "ushort8 __ovld __cnfn convert_ushort8_rtn(uchar8);\n" |
| 30112 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uchar8);\n" |
| 30113 | "ushort8 __ovld __cnfn convert_ushort8(uchar8);\n" |
| 30114 | "ushort8 __ovld __cnfn convert_ushort8_sat(uchar8);\n" |
| 30115 | "ushort8 __ovld __cnfn convert_ushort8_rte(short8);\n" |
| 30116 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(short8);\n" |
| 30117 | "ushort8 __ovld __cnfn convert_ushort8_rtz(short8);\n" |
| 30118 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(short8);\n" |
| 30119 | "ushort8 __ovld __cnfn convert_ushort8_rtp(short8);\n" |
| 30120 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(short8);\n" |
| 30121 | "ushort8 __ovld __cnfn convert_ushort8_rtn(short8);\n" |
| 30122 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(short8);\n" |
| 30123 | "ushort8 __ovld __cnfn convert_ushort8(short8);\n" |
| 30124 | "ushort8 __ovld __cnfn convert_ushort8_sat(short8);\n" |
| 30125 | "ushort8 __ovld __cnfn convert_ushort8_rte(ushort8);\n" |
| 30126 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(ushort8);\n" |
| 30127 | "ushort8 __ovld __cnfn convert_ushort8_rtz(ushort8);\n" |
| 30128 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ushort8);\n" |
| 30129 | "ushort8 __ovld __cnfn convert_ushort8_rtp(ushort8);\n" |
| 30130 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ushort8);\n" |
| 30131 | "ushort8 __ovld __cnfn convert_ushort8_rtn(ushort8);\n" |
| 30132 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ushort8);\n" |
| 30133 | "ushort8 __ovld __cnfn convert_ushort8(ushort8);\n" |
| 30134 | "ushort8 __ovld __cnfn convert_ushort8_sat(ushort8);\n" |
| 30135 | "ushort8 __ovld __cnfn convert_ushort8_rte(int8);\n" |
| 30136 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(int8);\n" |
| 30137 | "ushort8 __ovld __cnfn convert_ushort8_rtz(int8);\n" |
| 30138 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(int8);\n" |
| 30139 | "ushort8 __ovld __cnfn convert_ushort8_rtp(int8);\n" |
| 30140 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(int8);\n" |
| 30141 | "ushort8 __ovld __cnfn convert_ushort8_rtn(int8);\n" |
| 30142 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(int8);\n" |
| 30143 | "ushort8 __ovld __cnfn convert_ushort8(int8);\n" |
| 30144 | "ushort8 __ovld __cnfn convert_ushort8_sat(int8);\n" |
| 30145 | "ushort8 __ovld __cnfn convert_ushort8_rte(uint8);\n" |
| 30146 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(uint8);\n" |
| 30147 | "ushort8 __ovld __cnfn convert_ushort8_rtz(uint8);\n" |
| 30148 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(uint8);\n" |
| 30149 | "ushort8 __ovld __cnfn convert_ushort8_rtp(uint8);\n" |
| 30150 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(uint8);\n" |
| 30151 | "ushort8 __ovld __cnfn convert_ushort8_rtn(uint8);\n" |
| 30152 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(uint8);\n" |
| 30153 | "ushort8 __ovld __cnfn convert_ushort8(uint8);\n" |
| 30154 | "ushort8 __ovld __cnfn convert_ushort8_sat(uint8);\n" |
| 30155 | "ushort8 __ovld __cnfn convert_ushort8_rte(long8);\n" |
| 30156 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(long8);\n" |
| 30157 | "ushort8 __ovld __cnfn convert_ushort8_rtz(long8);\n" |
| 30158 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(long8);\n" |
| 30159 | "ushort8 __ovld __cnfn convert_ushort8_rtp(long8);\n" |
| 30160 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(long8);\n" |
| 30161 | "ushort8 __ovld __cnfn convert_ushort8_rtn(long8);\n" |
| 30162 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(long8);\n" |
| 30163 | "ushort8 __ovld __cnfn convert_ushort8(long8);\n" |
| 30164 | "ushort8 __ovld __cnfn convert_ushort8_sat(long8);\n" |
| 30165 | "ushort8 __ovld __cnfn convert_ushort8_rte(ulong8);\n" |
| 30166 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(ulong8);\n" |
| 30167 | "ushort8 __ovld __cnfn convert_ushort8_rtz(ulong8);\n" |
| 30168 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(ulong8);\n" |
| 30169 | "ushort8 __ovld __cnfn convert_ushort8_rtp(ulong8);\n" |
| 30170 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(ulong8);\n" |
| 30171 | "ushort8 __ovld __cnfn convert_ushort8_rtn(ulong8);\n" |
| 30172 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(ulong8);\n" |
| 30173 | "ushort8 __ovld __cnfn convert_ushort8(ulong8);\n" |
| 30174 | "ushort8 __ovld __cnfn convert_ushort8_sat(ulong8);\n" |
| 30175 | "ushort8 __ovld __cnfn convert_ushort8_rte(float8);\n" |
| 30176 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(float8);\n" |
| 30177 | "ushort8 __ovld __cnfn convert_ushort8_rtz(float8);\n" |
| 30178 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(float8);\n" |
| 30179 | "ushort8 __ovld __cnfn convert_ushort8_rtp(float8);\n" |
| 30180 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(float8);\n" |
| 30181 | "ushort8 __ovld __cnfn convert_ushort8_rtn(float8);\n" |
| 30182 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(float8);\n" |
| 30183 | "ushort8 __ovld __cnfn convert_ushort8(float8);\n" |
| 30184 | "ushort8 __ovld __cnfn convert_ushort8_sat(float8);\n" |
| 30185 | "int8 __ovld __cnfn convert_int8_rte(char8);\n" |
| 30186 | "int8 __ovld __cnfn convert_int8_sat_rte(char8);\n" |
| 30187 | "int8 __ovld __cnfn convert_int8_rtz(char8);\n" |
| 30188 | "int8 __ovld __cnfn convert_int8_sat_rtz(char8);\n" |
| 30189 | "int8 __ovld __cnfn convert_int8_rtp(char8);\n" |
| 30190 | "int8 __ovld __cnfn convert_int8_sat_rtp(char8);\n" |
| 30191 | "int8 __ovld __cnfn convert_int8_rtn(char8);\n" |
| 30192 | "int8 __ovld __cnfn convert_int8_sat_rtn(char8);\n" |
| 30193 | "int8 __ovld __cnfn convert_int8(char8);\n" |
| 30194 | "int8 __ovld __cnfn convert_int8_sat(char8);\n" |
| 30195 | "int8 __ovld __cnfn convert_int8_rte(uchar8);\n" |
| 30196 | "int8 __ovld __cnfn convert_int8_sat_rte(uchar8);\n" |
| 30197 | "int8 __ovld __cnfn convert_int8_rtz(uchar8);\n" |
| 30198 | "int8 __ovld __cnfn convert_int8_sat_rtz(uchar8);\n" |
| 30199 | "int8 __ovld __cnfn convert_int8_rtp(uchar8);\n" |
| 30200 | "int8 __ovld __cnfn convert_int8_sat_rtp(uchar8);\n" |
| 30201 | "int8 __ovld __cnfn convert_int8_rtn(uchar8);\n" |
| 30202 | "int8 __ovld __cnfn convert_int8_sat_rtn(uchar8);\n" |
| 30203 | "int8 __ovld __cnfn convert_int8(uchar8);\n" |
| 30204 | "int8 __ovld __cnfn convert_int8_sat(uchar8);\n" |
| 30205 | "int8 __ovld __cnfn convert_int8_rte(short8);\n" |
| 30206 | "int8 __ovld __cnfn convert_int8_sat_rte(short8);\n" |
| 30207 | "int8 __ovld __cnfn convert_int8_rtz(short8);\n" |
| 30208 | "int8 __ovld __cnfn convert_int8_sat_rtz(short8);\n" |
| 30209 | "int8 __ovld __cnfn convert_int8_rtp(short8);\n" |
| 30210 | "int8 __ovld __cnfn convert_int8_sat_rtp(short8);\n" |
| 30211 | "int8 __ovld __cnfn convert_int8_rtn(short8);\n" |
| 30212 | "int8 __ovld __cnfn convert_int8_sat_rtn(short8);\n" |
| 30213 | "int8 __ovld __cnfn convert_int8(short8);\n" |
| 30214 | "int8 __ovld __cnfn convert_int8_sat(short8);\n" |
| 30215 | "int8 __ovld __cnfn convert_int8_rte(ushort8);\n" |
| 30216 | "int8 __ovld __cnfn convert_int8_sat_rte(ushort8);\n" |
| 30217 | "int8 __ovld __cnfn convert_int8_rtz(ushort8);\n" |
| 30218 | "int8 __ovld __cnfn convert_int8_sat_rtz(ushort8);\n" |
| 30219 | "int8 __ovld __cnfn convert_int8_rtp(ushort8);\n" |
| 30220 | "int8 __ovld __cnfn convert_int8_sat_rtp(ushort8);\n" |
| 30221 | "int8 __ovld __cnfn convert_int8_rtn(ushort8);\n" |
| 30222 | "int8 __ovld __cnfn convert_int8_sat_rtn(ushort8);\n" |
| 30223 | "int8 __ovld __cnfn convert_int8(ushort8);\n" |
| 30224 | "int8 __ovld __cnfn convert_int8_sat(ushort8);\n" |
| 30225 | "int8 __ovld __cnfn convert_int8_rte(int8);\n" |
| 30226 | "int8 __ovld __cnfn convert_int8_sat_rte(int8);\n" |
| 30227 | "int8 __ovld __cnfn convert_int8_rtz(int8);\n" |
| 30228 | "int8 __ovld __cnfn convert_int8_sat_rtz(int8);\n" |
| 30229 | "int8 __ovld __cnfn convert_int8_rtp(int8);\n" |
| 30230 | "int8 __ovld __cnfn convert_int8_sat_rtp(int8);\n" |
| 30231 | "int8 __ovld __cnfn convert_int8_rtn(int8);\n" |
| 30232 | "int8 __ovld __cnfn convert_int8_sat_rtn(int8);\n" |
| 30233 | "int8 __ovld __cnfn convert_int8(int8);\n" |
| 30234 | "int8 __ovld __cnfn convert_int8_sat(int8);\n" |
| 30235 | "int8 __ovld __cnfn convert_int8_rte(uint8);\n" |
| 30236 | "int8 __ovld __cnfn convert_int8_sat_rte(uint8);\n" |
| 30237 | "int8 __ovld __cnfn convert_int8_rtz(uint8);\n" |
| 30238 | "int8 __ovld __cnfn convert_int8_sat_rtz(uint8);\n" |
| 30239 | "int8 __ovld __cnfn convert_int8_rtp(uint8);\n" |
| 30240 | "int8 __ovld __cnfn convert_int8_sat_rtp(uint8);\n" |
| 30241 | "int8 __ovld __cnfn convert_int8_rtn(uint8);\n" |
| 30242 | "int8 __ovld __cnfn convert_int8_sat_rtn(uint8);\n" |
| 30243 | "int8 __ovld __cnfn convert_int8(uint8);\n" |
| 30244 | "int8 __ovld __cnfn convert_int8_sat(uint8);\n" |
| 30245 | "int8 __ovld __cnfn convert_int8_rte(long8);\n" |
| 30246 | "int8 __ovld __cnfn convert_int8_sat_rte(long8);\n" |
| 30247 | "int8 __ovld __cnfn convert_int8_rtz(long8);\n" |
| 30248 | "int8 __ovld __cnfn convert_int8_sat_rtz(long8);\n" |
| 30249 | "int8 __ovld __cnfn convert_int8_rtp(long8);\n" |
| 30250 | "int8 __ovld __cnfn convert_int8_sat_rtp(long8);\n" |
| 30251 | "int8 __ovld __cnfn convert_int8_rtn(long8);\n" |
| 30252 | "int8 __ovld __cnfn convert_int8_sat_rtn(long8);\n" |
| 30253 | "int8 __ovld __cnfn convert_int8(long8);\n" |
| 30254 | "int8 __ovld __cnfn convert_int8_sat(long8);\n" |
| 30255 | "int8 __ovld __cnfn convert_int8_rte(ulong8);\n" |
| 30256 | "int8 __ovld __cnfn convert_int8_sat_rte(ulong8);\n" |
| 30257 | "int8 __ovld __cnfn convert_int8_rtz(ulong8);\n" |
| 30258 | "int8 __ovld __cnfn convert_int8_sat_rtz(ulong8);\n" |
| 30259 | "int8 __ovld __cnfn convert_int8_rtp(ulong8);\n" |
| 30260 | "int8 __ovld __cnfn convert_int8_sat_rtp(ulong8);\n" |
| 30261 | "int8 __ovld __cnfn convert_int8_rtn(ulong8);\n" |
| 30262 | "int8 __ovld __cnfn convert_int8_sat_rtn(ulong8);\n" |
| 30263 | "int8 __ovld __cnfn convert_int8(ulong8);\n" |
| 30264 | "int8 __ovld __cnfn convert_int8_sat(ulong8);\n" |
| 30265 | "int8 __ovld __cnfn convert_int8_rte(float8);\n" |
| 30266 | "int8 __ovld __cnfn convert_int8_sat_rte(float8);\n" |
| 30267 | "int8 __ovld __cnfn convert_int8_rtz(float8);\n" |
| 30268 | "int8 __ovld __cnfn convert_int8_sat_rtz(float8);\n" |
| 30269 | "int8 __ovld __cnfn convert_int8_rtp(float8);\n" |
| 30270 | "int8 __ovld __cnfn convert_int8_sat_rtp(float8);\n" |
| 30271 | "int8 __ovld __cnfn convert_int8_rtn(float8);\n" |
| 30272 | "int8 __ovld __cnfn convert_int8_sat_rtn(float8);\n" |
| 30273 | "int8 __ovld __cnfn convert_int8(float8);\n" |
| 30274 | "int8 __ovld __cnfn convert_int8_sat(float8);\n" |
| 30275 | "uint8 __ovld __cnfn convert_uint8_rte(char8);\n" |
| 30276 | "uint8 __ovld __cnfn convert_uint8_sat_rte(char8);\n" |
| 30277 | "uint8 __ovld __cnfn convert_uint8_rtz(char8);\n" |
| 30278 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(char8);\n" |
| 30279 | "uint8 __ovld __cnfn convert_uint8_rtp(char8);\n" |
| 30280 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(char8);\n" |
| 30281 | "uint8 __ovld __cnfn convert_uint8_rtn(char8);\n" |
| 30282 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(char8);\n" |
| 30283 | "uint8 __ovld __cnfn convert_uint8(char8);\n" |
| 30284 | "uint8 __ovld __cnfn convert_uint8_sat(char8);\n" |
| 30285 | "uint8 __ovld __cnfn convert_uint8_rte(uchar8);\n" |
| 30286 | "uint8 __ovld __cnfn convert_uint8_sat_rte(uchar8);\n" |
| 30287 | "uint8 __ovld __cnfn convert_uint8_rtz(uchar8);\n" |
| 30288 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(uchar8);\n" |
| 30289 | "uint8 __ovld __cnfn convert_uint8_rtp(uchar8);\n" |
| 30290 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(uchar8);\n" |
| 30291 | "uint8 __ovld __cnfn convert_uint8_rtn(uchar8);\n" |
| 30292 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(uchar8);\n" |
| 30293 | "uint8 __ovld __cnfn convert_uint8(uchar8);\n" |
| 30294 | "uint8 __ovld __cnfn convert_uint8_sat(uchar8);\n" |
| 30295 | "uint8 __ovld __cnfn convert_uint8_rte(short8);\n" |
| 30296 | "uint8 __ovld __cnfn convert_uint8_sat_rte(short8);\n" |
| 30297 | "uint8 __ovld __cnfn convert_uint8_rtz(short8);\n" |
| 30298 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(short8);\n" |
| 30299 | "uint8 __ovld __cnfn convert_uint8_rtp(short8);\n" |
| 30300 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(short8);\n" |
| 30301 | "uint8 __ovld __cnfn convert_uint8_rtn(short8);\n" |
| 30302 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(short8);\n" |
| 30303 | "uint8 __ovld __cnfn convert_uint8(short8);\n" |
| 30304 | "uint8 __ovld __cnfn convert_uint8_sat(short8);\n" |
| 30305 | "uint8 __ovld __cnfn convert_uint8_rte(ushort8);\n" |
| 30306 | "uint8 __ovld __cnfn convert_uint8_sat_rte(ushort8);\n" |
| 30307 | "uint8 __ovld __cnfn convert_uint8_rtz(ushort8);\n" |
| 30308 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(ushort8);\n" |
| 30309 | "uint8 __ovld __cnfn convert_uint8_rtp(ushort8);\n" |
| 30310 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(ushort8);\n" |
| 30311 | "uint8 __ovld __cnfn convert_uint8_rtn(ushort8);\n" |
| 30312 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(ushort8);\n" |
| 30313 | "uint8 __ovld __cnfn convert_uint8(ushort8);\n" |
| 30314 | "uint8 __ovld __cnfn convert_uint8_sat(ushort8);\n" |
| 30315 | "uint8 __ovld __cnfn convert_uint8_rte(int8);\n" |
| 30316 | "uint8 __ovld __cnfn convert_uint8_sat_rte(int8);\n" |
| 30317 | "uint8 __ovld __cnfn convert_uint8_rtz(int8);\n" |
| 30318 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(int8);\n" |
| 30319 | "uint8 __ovld __cnfn convert_uint8_rtp(int8);\n" |
| 30320 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(int8);\n" |
| 30321 | "uint8 __ovld __cnfn convert_uint8_rtn(int8);\n" |
| 30322 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(int8);\n" |
| 30323 | "uint8 __ovld __cnfn convert_uint8(int8);\n" |
| 30324 | "uint8 __ovld __cnfn convert_uint8_sat(int8);\n" |
| 30325 | "uint8 __ovld __cnfn convert_uint8_rte(uint8);\n" |
| 30326 | "uint8 __ovld __cnfn convert_uint8_sat_rte(uint8);\n" |
| 30327 | "uint8 __ovld __cnfn convert_uint8_rtz(uint8);\n" |
| 30328 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(uint8);\n" |
| 30329 | "uint8 __ovld __cnfn convert_uint8_rtp(uint8);\n" |
| 30330 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(uint8);\n" |
| 30331 | "uint8 __ovld __cnfn convert_uint8_rtn(uint8);\n" |
| 30332 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(uint8);\n" |
| 30333 | "uint8 __ovld __cnfn convert_uint8(uint8);\n" |
| 30334 | "uint8 __ovld __cnfn convert_uint8_sat(uint8);\n" |
| 30335 | "uint8 __ovld __cnfn convert_uint8_rte(long8);\n" |
| 30336 | "uint8 __ovld __cnfn convert_uint8_sat_rte(long8);\n" |
| 30337 | "uint8 __ovld __cnfn convert_uint8_rtz(long8);\n" |
| 30338 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(long8);\n" |
| 30339 | "uint8 __ovld __cnfn convert_uint8_rtp(long8);\n" |
| 30340 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(long8);\n" |
| 30341 | "uint8 __ovld __cnfn convert_uint8_rtn(long8);\n" |
| 30342 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(long8);\n" |
| 30343 | "uint8 __ovld __cnfn convert_uint8(long8);\n" |
| 30344 | "uint8 __ovld __cnfn convert_uint8_sat(long8);\n" |
| 30345 | "uint8 __ovld __cnfn convert_uint8_rte(ulong8);\n" |
| 30346 | "uint8 __ovld __cnfn convert_uint8_sat_rte(ulong8);\n" |
| 30347 | "uint8 __ovld __cnfn convert_uint8_rtz(ulong8);\n" |
| 30348 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(ulong8);\n" |
| 30349 | "uint8 __ovld __cnfn convert_uint8_rtp(ulong8);\n" |
| 30350 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(ulong8);\n" |
| 30351 | "uint8 __ovld __cnfn convert_uint8_rtn(ulong8);\n" |
| 30352 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(ulong8);\n" |
| 30353 | "uint8 __ovld __cnfn convert_uint8(ulong8);\n" |
| 30354 | "uint8 __ovld __cnfn convert_uint8_sat(ulong8);\n" |
| 30355 | "uint8 __ovld __cnfn convert_uint8_rte(float8);\n" |
| 30356 | "uint8 __ovld __cnfn convert_uint8_sat_rte(float8);\n" |
| 30357 | "uint8 __ovld __cnfn convert_uint8_rtz(float8);\n" |
| 30358 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(float8);\n" |
| 30359 | "uint8 __ovld __cnfn convert_uint8_rtp(float8);\n" |
| 30360 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(float8);\n" |
| 30361 | "uint8 __ovld __cnfn convert_uint8_rtn(float8);\n" |
| 30362 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(float8);\n" |
| 30363 | "uint8 __ovld __cnfn convert_uint8(float8);\n" |
| 30364 | "uint8 __ovld __cnfn convert_uint8_sat(float8);\n" |
| 30365 | "long8 __ovld __cnfn convert_long8_rte(char8);\n" |
| 30366 | "long8 __ovld __cnfn convert_long8_sat_rte(char8);\n" |
| 30367 | "long8 __ovld __cnfn convert_long8_rtz(char8);\n" |
| 30368 | "long8 __ovld __cnfn convert_long8_sat_rtz(char8);\n" |
| 30369 | "long8 __ovld __cnfn convert_long8_rtp(char8);\n" |
| 30370 | "long8 __ovld __cnfn convert_long8_sat_rtp(char8);\n" |
| 30371 | "long8 __ovld __cnfn convert_long8_rtn(char8);\n" |
| 30372 | "long8 __ovld __cnfn convert_long8_sat_rtn(char8);\n" |
| 30373 | "long8 __ovld __cnfn convert_long8(char8);\n" |
| 30374 | "long8 __ovld __cnfn convert_long8_sat(char8);\n" |
| 30375 | "long8 __ovld __cnfn convert_long8_rte(uchar8);\n" |
| 30376 | "long8 __ovld __cnfn convert_long8_sat_rte(uchar8);\n" |
| 30377 | "long8 __ovld __cnfn convert_long8_rtz(uchar8);\n" |
| 30378 | "long8 __ovld __cnfn convert_long8_sat_rtz(uchar8);\n" |
| 30379 | "long8 __ovld __cnfn convert_long8_rtp(uchar8);\n" |
| 30380 | "long8 __ovld __cnfn convert_long8_sat_rtp(uchar8);\n" |
| 30381 | "long8 __ovld __cnfn convert_long8_rtn(uchar8);\n" |
| 30382 | "long8 __ovld __cnfn convert_long8_sat_rtn(uchar8);\n" |
| 30383 | "long8 __ovld __cnfn convert_long8(uchar8);\n" |
| 30384 | "long8 __ovld __cnfn convert_long8_sat(uchar8);\n" |
| 30385 | "long8 __ovld __cnfn convert_long8_rte(short8);\n" |
| 30386 | "long8 __ovld __cnfn convert_long8_sat_rte(short8);\n" |
| 30387 | "long8 __ovld __cnfn convert_long8_rtz(short8);\n" |
| 30388 | "long8 __ovld __cnfn convert_long8_sat_rtz(short8);\n" |
| 30389 | "long8 __ovld __cnfn convert_long8_rtp(short8);\n" |
| 30390 | "long8 __ovld __cnfn convert_long8_sat_rtp(short8);\n" |
| 30391 | "long8 __ovld __cnfn convert_long8_rtn(short8);\n" |
| 30392 | "long8 __ovld __cnfn convert_long8_sat_rtn(short8);\n" |
| 30393 | "long8 __ovld __cnfn convert_long8(short8);\n" |
| 30394 | "long8 __ovld __cnfn convert_long8_sat(short8);\n" |
| 30395 | "long8 __ovld __cnfn convert_long8_rte(ushort8);\n" |
| 30396 | "long8 __ovld __cnfn convert_long8_sat_rte(ushort8);\n" |
| 30397 | "long8 __ovld __cnfn convert_long8_rtz(ushort8);\n" |
| 30398 | "long8 __ovld __cnfn convert_long8_sat_rtz(ushort8);\n" |
| 30399 | "long8 __ovld __cnfn convert_long8_rtp(ushort8);\n" |
| 30400 | "long8 __ovld __cnfn convert_long8_sat_rtp(ushort8);\n" |
| 30401 | "long8 __ovld __cnfn convert_long8_rtn(ushort8);\n" |
| 30402 | "long8 __ovld __cnfn convert_long8_sat_rtn(ushort8);\n" |
| 30403 | "long8 __ovld __cnfn convert_long8(ushort8);\n" |
| 30404 | "long8 __ovld __cnfn convert_long8_sat(ushort8);\n" |
| 30405 | "long8 __ovld __cnfn convert_long8_rte(int8);\n" |
| 30406 | "long8 __ovld __cnfn convert_long8_sat_rte(int8);\n" |
| 30407 | "long8 __ovld __cnfn convert_long8_rtz(int8);\n" |
| 30408 | "long8 __ovld __cnfn convert_long8_sat_rtz(int8);\n" |
| 30409 | "long8 __ovld __cnfn convert_long8_rtp(int8);\n" |
| 30410 | "long8 __ovld __cnfn convert_long8_sat_rtp(int8);\n" |
| 30411 | "long8 __ovld __cnfn convert_long8_rtn(int8);\n" |
| 30412 | "long8 __ovld __cnfn convert_long8_sat_rtn(int8);\n" |
| 30413 | "long8 __ovld __cnfn convert_long8(int8);\n" |
| 30414 | "long8 __ovld __cnfn convert_long8_sat(int8);\n" |
| 30415 | "long8 __ovld __cnfn convert_long8_rte(uint8);\n" |
| 30416 | "long8 __ovld __cnfn convert_long8_sat_rte(uint8);\n" |
| 30417 | "long8 __ovld __cnfn convert_long8_rtz(uint8);\n" |
| 30418 | "long8 __ovld __cnfn convert_long8_sat_rtz(uint8);\n" |
| 30419 | "long8 __ovld __cnfn convert_long8_rtp(uint8);\n" |
| 30420 | "long8 __ovld __cnfn convert_long8_sat_rtp(uint8);\n" |
| 30421 | "long8 __ovld __cnfn convert_long8_rtn(uint8);\n" |
| 30422 | "long8 __ovld __cnfn convert_long8_sat_rtn(uint8);\n" |
| 30423 | "long8 __ovld __cnfn convert_long8(uint8);\n" |
| 30424 | "long8 __ovld __cnfn convert_long8_sat(uint8);\n" |
| 30425 | "long8 __ovld __cnfn convert_long8_rte(long8);\n" |
| 30426 | "long8 __ovld __cnfn convert_long8_sat_rte(long8);\n" |
| 30427 | "long8 __ovld __cnfn convert_long8_rtz(long8);\n" |
| 30428 | "long8 __ovld __cnfn convert_long8_sat_rtz(long8);\n" |
| 30429 | "long8 __ovld __cnfn convert_long8_rtp(long8);\n" |
| 30430 | "long8 __ovld __cnfn convert_long8_sat_rtp(long8);\n" |
| 30431 | "long8 __ovld __cnfn convert_long8_rtn(long8);\n" |
| 30432 | "long8 __ovld __cnfn convert_long8_sat_rtn(long8);\n" |
| 30433 | "long8 __ovld __cnfn convert_long8(long8);\n" |
| 30434 | "long8 __ovld __cnfn convert_long8_sat(long8);\n" |
| 30435 | "long8 __ovld __cnfn convert_long8_rte(ulong8);\n" |
| 30436 | "long8 __ovld __cnfn convert_long8_sat_rte(ulong8);\n" |
| 30437 | "long8 __ovld __cnfn convert_long8_rtz(ulong8);\n" |
| 30438 | "long8 __ovld __cnfn convert_long8_sat_rtz(ulong8);\n" |
| 30439 | "long8 __ovld __cnfn convert_long8_rtp(ulong8);\n" |
| 30440 | "long8 __ovld __cnfn convert_long8_sat_rtp(ulong8);\n" |
| 30441 | "long8 __ovld __cnfn convert_long8_rtn(ulong8);\n" |
| 30442 | "long8 __ovld __cnfn convert_long8_sat_rtn(ulong8);\n" |
| 30443 | "long8 __ovld __cnfn convert_long8(ulong8);\n" |
| 30444 | "long8 __ovld __cnfn convert_long8_sat(ulong8);\n" |
| 30445 | "long8 __ovld __cnfn convert_long8_rte(float8);\n" |
| 30446 | "long8 __ovld __cnfn convert_long8_sat_rte(float8);\n" |
| 30447 | "long8 __ovld __cnfn convert_long8_rtz(float8);\n" |
| 30448 | "long8 __ovld __cnfn convert_long8_sat_rtz(float8);\n" |
| 30449 | "long8 __ovld __cnfn convert_long8_rtp(float8);\n" |
| 30450 | "long8 __ovld __cnfn convert_long8_sat_rtp(float8);\n" |
| 30451 | "long8 __ovld __cnfn convert_long8_rtn(float8);\n" |
| 30452 | "long8 __ovld __cnfn convert_long8_sat_rtn(float8);\n" |
| 30453 | "long8 __ovld __cnfn convert_long8(float8);\n" |
| 30454 | "long8 __ovld __cnfn convert_long8_sat(float8);\n" |
| 30455 | "ulong8 __ovld __cnfn convert_ulong8_rte(char8);\n" |
| 30456 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(char8);\n" |
| 30457 | "ulong8 __ovld __cnfn convert_ulong8_rtz(char8);\n" |
| 30458 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(char8);\n" |
| 30459 | "ulong8 __ovld __cnfn convert_ulong8_rtp(char8);\n" |
| 30460 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(char8);\n" |
| 30461 | "ulong8 __ovld __cnfn convert_ulong8_rtn(char8);\n" |
| 30462 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(char8);\n" |
| 30463 | "ulong8 __ovld __cnfn convert_ulong8(char8);\n" |
| 30464 | "ulong8 __ovld __cnfn convert_ulong8_sat(char8);\n" |
| 30465 | "ulong8 __ovld __cnfn convert_ulong8_rte(uchar8);\n" |
| 30466 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(uchar8);\n" |
| 30467 | "ulong8 __ovld __cnfn convert_ulong8_rtz(uchar8);\n" |
| 30468 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uchar8);\n" |
| 30469 | "ulong8 __ovld __cnfn convert_ulong8_rtp(uchar8);\n" |
| 30470 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uchar8);\n" |
| 30471 | "ulong8 __ovld __cnfn convert_ulong8_rtn(uchar8);\n" |
| 30472 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uchar8);\n" |
| 30473 | "ulong8 __ovld __cnfn convert_ulong8(uchar8);\n" |
| 30474 | "ulong8 __ovld __cnfn convert_ulong8_sat(uchar8);\n" |
| 30475 | "ulong8 __ovld __cnfn convert_ulong8_rte(short8);\n" |
| 30476 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(short8);\n" |
| 30477 | "ulong8 __ovld __cnfn convert_ulong8_rtz(short8);\n" |
| 30478 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(short8);\n" |
| 30479 | "ulong8 __ovld __cnfn convert_ulong8_rtp(short8);\n" |
| 30480 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(short8);\n" |
| 30481 | "ulong8 __ovld __cnfn convert_ulong8_rtn(short8);\n" |
| 30482 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(short8);\n" |
| 30483 | "ulong8 __ovld __cnfn convert_ulong8(short8);\n" |
| 30484 | "ulong8 __ovld __cnfn convert_ulong8_sat(short8);\n" |
| 30485 | "ulong8 __ovld __cnfn convert_ulong8_rte(ushort8);\n" |
| 30486 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(ushort8);\n" |
| 30487 | "ulong8 __ovld __cnfn convert_ulong8_rtz(ushort8);\n" |
| 30488 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ushort8);\n" |
| 30489 | "ulong8 __ovld __cnfn convert_ulong8_rtp(ushort8);\n" |
| 30490 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ushort8);\n" |
| 30491 | "ulong8 __ovld __cnfn convert_ulong8_rtn(ushort8);\n" |
| 30492 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ushort8);\n" |
| 30493 | "ulong8 __ovld __cnfn convert_ulong8(ushort8);\n" |
| 30494 | "ulong8 __ovld __cnfn convert_ulong8_sat(ushort8);\n" |
| 30495 | "ulong8 __ovld __cnfn convert_ulong8_rte(int8);\n" |
| 30496 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(int8);\n" |
| 30497 | "ulong8 __ovld __cnfn convert_ulong8_rtz(int8);\n" |
| 30498 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(int8);\n" |
| 30499 | "ulong8 __ovld __cnfn convert_ulong8_rtp(int8);\n" |
| 30500 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(int8);\n" |
| 30501 | "ulong8 __ovld __cnfn convert_ulong8_rtn(int8);\n" |
| 30502 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(int8);\n" |
| 30503 | "ulong8 __ovld __cnfn convert_ulong8(int8);\n" |
| 30504 | "ulong8 __ovld __cnfn convert_ulong8_sat(int8);\n" |
| 30505 | "ulong8 __ovld __cnfn convert_ulong8_rte(uint8);\n" |
| 30506 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(uint8);\n" |
| 30507 | "ulong8 __ovld __cnfn convert_ulong8_rtz(uint8);\n" |
| 30508 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(uint8);\n" |
| 30509 | "ulong8 __ovld __cnfn convert_ulong8_rtp(uint8);\n" |
| 30510 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(uint8);\n" |
| 30511 | "ulong8 __ovld __cnfn convert_ulong8_rtn(uint8);\n" |
| 30512 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(uint8);\n" |
| 30513 | "ulong8 __ovld __cnfn convert_ulong8(uint8);\n" |
| 30514 | "ulong8 __ovld __cnfn convert_ulong8_sat(uint8);\n" |
| 30515 | "ulong8 __ovld __cnfn convert_ulong8_rte(long8);\n" |
| 30516 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(long8);\n" |
| 30517 | "ulong8 __ovld __cnfn convert_ulong8_rtz(long8);\n" |
| 30518 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(long8);\n" |
| 30519 | "ulong8 __ovld __cnfn convert_ulong8_rtp(long8);\n" |
| 30520 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(long8);\n" |
| 30521 | "ulong8 __ovld __cnfn convert_ulong8_rtn(long8);\n" |
| 30522 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(long8);\n" |
| 30523 | "ulong8 __ovld __cnfn convert_ulong8(long8);\n" |
| 30524 | "ulong8 __ovld __cnfn convert_ulong8_sat(long8);\n" |
| 30525 | "ulong8 __ovld __cnfn convert_ulong8_rte(ulong8);\n" |
| 30526 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(ulong8);\n" |
| 30527 | "ulong8 __ovld __cnfn convert_ulong8_rtz(ulong8);\n" |
| 30528 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(ulong8);\n" |
| 30529 | "ulong8 __ovld __cnfn convert_ulong8_rtp(ulong8);\n" |
| 30530 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(ulong8);\n" |
| 30531 | "ulong8 __ovld __cnfn convert_ulong8_rtn(ulong8);\n" |
| 30532 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(ulong8);\n" |
| 30533 | "ulong8 __ovld __cnfn convert_ulong8(ulong8);\n" |
| 30534 | "ulong8 __ovld __cnfn convert_ulong8_sat(ulong8);\n" |
| 30535 | "ulong8 __ovld __cnfn convert_ulong8_rte(float8);\n" |
| 30536 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(float8);\n" |
| 30537 | "ulong8 __ovld __cnfn convert_ulong8_rtz(float8);\n" |
| 30538 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(float8);\n" |
| 30539 | "ulong8 __ovld __cnfn convert_ulong8_rtp(float8);\n" |
| 30540 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(float8);\n" |
| 30541 | "ulong8 __ovld __cnfn convert_ulong8_rtn(float8);\n" |
| 30542 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(float8);\n" |
| 30543 | "ulong8 __ovld __cnfn convert_ulong8(float8);\n" |
| 30544 | "ulong8 __ovld __cnfn convert_ulong8_sat(float8);\n" |
| 30545 | "float8 __ovld __cnfn convert_float8_rte(char8);\n" |
| 30546 | "float8 __ovld __cnfn convert_float8_rtz(char8);\n" |
| 30547 | "float8 __ovld __cnfn convert_float8_rtp(char8);\n" |
| 30548 | "float8 __ovld __cnfn convert_float8_rtn(char8);\n" |
| 30549 | "float8 __ovld __cnfn convert_float8(char8);\n" |
| 30550 | "float8 __ovld __cnfn convert_float8_rte(uchar8);\n" |
| 30551 | "float8 __ovld __cnfn convert_float8_rtz(uchar8);\n" |
| 30552 | "float8 __ovld __cnfn convert_float8_rtp(uchar8);\n" |
| 30553 | "float8 __ovld __cnfn convert_float8_rtn(uchar8);\n" |
| 30554 | "float8 __ovld __cnfn convert_float8(uchar8);\n" |
| 30555 | "float8 __ovld __cnfn convert_float8_rte(short8);\n" |
| 30556 | "float8 __ovld __cnfn convert_float8_rtz(short8);\n" |
| 30557 | "float8 __ovld __cnfn convert_float8_rtp(short8);\n" |
| 30558 | "float8 __ovld __cnfn convert_float8_rtn(short8);\n" |
| 30559 | "float8 __ovld __cnfn convert_float8(short8);\n" |
| 30560 | "float8 __ovld __cnfn convert_float8_rte(ushort8);\n" |
| 30561 | "float8 __ovld __cnfn convert_float8_rtz(ushort8);\n" |
| 30562 | "float8 __ovld __cnfn convert_float8_rtp(ushort8);\n" |
| 30563 | "float8 __ovld __cnfn convert_float8_rtn(ushort8);\n" |
| 30564 | "float8 __ovld __cnfn convert_float8(ushort8);\n" |
| 30565 | "float8 __ovld __cnfn convert_float8_rte(int8);\n" |
| 30566 | "float8 __ovld __cnfn convert_float8_rtz(int8);\n" |
| 30567 | "float8 __ovld __cnfn convert_float8_rtp(int8);\n" |
| 30568 | "float8 __ovld __cnfn convert_float8_rtn(int8);\n" |
| 30569 | "float8 __ovld __cnfn convert_float8(int8);\n" |
| 30570 | "float8 __ovld __cnfn convert_float8_rte(uint8);\n" |
| 30571 | "float8 __ovld __cnfn convert_float8_rtz(uint8);\n" |
| 30572 | "float8 __ovld __cnfn convert_float8_rtp(uint8);\n" |
| 30573 | "float8 __ovld __cnfn convert_float8_rtn(uint8);\n" |
| 30574 | "float8 __ovld __cnfn convert_float8(uint8);\n" |
| 30575 | "float8 __ovld __cnfn convert_float8_rte(long8);\n" |
| 30576 | "float8 __ovld __cnfn convert_float8_rtz(long8);\n" |
| 30577 | "float8 __ovld __cnfn convert_float8_rtp(long8);\n" |
| 30578 | "float8 __ovld __cnfn convert_float8_rtn(long8);\n" |
| 30579 | "float8 __ovld __cnfn convert_float8(long8);\n" |
| 30580 | "float8 __ovld __cnfn convert_float8_rte(ulong8);\n" |
| 30581 | "float8 __ovld __cnfn convert_float8_rtz(ulong8);\n" |
| 30582 | "float8 __ovld __cnfn convert_float8_rtp(ulong8);\n" |
| 30583 | "float8 __ovld __cnfn convert_float8_rtn(ulong8);\n" |
| 30584 | "float8 __ovld __cnfn convert_float8(ulong8);\n" |
| 30585 | "float8 __ovld __cnfn convert_float8_rte(float8);\n" |
| 30586 | "float8 __ovld __cnfn convert_float8_rtz(float8);\n" |
| 30587 | "float8 __ovld __cnfn convert_float8_rtp(float8);\n" |
| 30588 | "float8 __ovld __cnfn convert_float8_rtn(float8);\n" |
| 30589 | "float8 __ovld __cnfn convert_float8(float8);\n" |
| 30590 | "char16 __ovld __cnfn convert_char16_rte(char16);\n" |
| 30591 | "char16 __ovld __cnfn convert_char16_sat_rte(char16);\n" |
| 30592 | "char16 __ovld __cnfn convert_char16_rtz(char16);\n" |
| 30593 | "char16 __ovld __cnfn convert_char16_sat_rtz(char16);\n" |
| 30594 | "char16 __ovld __cnfn convert_char16_rtp(char16);\n" |
| 30595 | "char16 __ovld __cnfn convert_char16_sat_rtp(char16);\n" |
| 30596 | "char16 __ovld __cnfn convert_char16_rtn(char16);\n" |
| 30597 | "char16 __ovld __cnfn convert_char16_sat_rtn(char16);\n" |
| 30598 | "char16 __ovld __cnfn convert_char16(char16);\n" |
| 30599 | "char16 __ovld __cnfn convert_char16_sat(char16);\n" |
| 30600 | "char16 __ovld __cnfn convert_char16_rte(uchar16);\n" |
| 30601 | "char16 __ovld __cnfn convert_char16_sat_rte(uchar16);\n" |
| 30602 | "char16 __ovld __cnfn convert_char16_rtz(uchar16);\n" |
| 30603 | "char16 __ovld __cnfn convert_char16_sat_rtz(uchar16);\n" |
| 30604 | "char16 __ovld __cnfn convert_char16_rtp(uchar16);\n" |
| 30605 | "char16 __ovld __cnfn convert_char16_sat_rtp(uchar16);\n" |
| 30606 | "char16 __ovld __cnfn convert_char16_rtn(uchar16);\n" |
| 30607 | "char16 __ovld __cnfn convert_char16_sat_rtn(uchar16);\n" |
| 30608 | "char16 __ovld __cnfn convert_char16(uchar16);\n" |
| 30609 | "char16 __ovld __cnfn convert_char16_sat(uchar16);\n" |
| 30610 | "char16 __ovld __cnfn convert_char16_rte(short16);\n" |
| 30611 | "char16 __ovld __cnfn convert_char16_sat_rte(short16);\n" |
| 30612 | "char16 __ovld __cnfn convert_char16_rtz(short16);\n" |
| 30613 | "char16 __ovld __cnfn convert_char16_sat_rtz(short16);\n" |
| 30614 | "char16 __ovld __cnfn convert_char16_rtp(short16);\n" |
| 30615 | "char16 __ovld __cnfn convert_char16_sat_rtp(short16);\n" |
| 30616 | "char16 __ovld __cnfn convert_char16_rtn(short16);\n" |
| 30617 | "char16 __ovld __cnfn convert_char16_sat_rtn(short16);\n" |
| 30618 | "char16 __ovld __cnfn convert_char16(short16);\n" |
| 30619 | "char16 __ovld __cnfn convert_char16_sat(short16);\n" |
| 30620 | "char16 __ovld __cnfn convert_char16_rte(ushort16);\n" |
| 30621 | "char16 __ovld __cnfn convert_char16_sat_rte(ushort16);\n" |
| 30622 | "char16 __ovld __cnfn convert_char16_rtz(ushort16);\n" |
| 30623 | "char16 __ovld __cnfn convert_char16_sat_rtz(ushort16);\n" |
| 30624 | "char16 __ovld __cnfn convert_char16_rtp(ushort16);\n" |
| 30625 | "char16 __ovld __cnfn convert_char16_sat_rtp(ushort16);\n" |
| 30626 | "char16 __ovld __cnfn convert_char16_rtn(ushort16);\n" |
| 30627 | "char16 __ovld __cnfn convert_char16_sat_rtn(ushort16);\n" |
| 30628 | "char16 __ovld __cnfn convert_char16(ushort16);\n" |
| 30629 | "char16 __ovld __cnfn convert_char16_sat(ushort16);\n" |
| 30630 | "char16 __ovld __cnfn convert_char16_rte(int16);\n" |
| 30631 | "char16 __ovld __cnfn convert_char16_sat_rte(int16);\n" |
| 30632 | "char16 __ovld __cnfn convert_char16_rtz(int16);\n" |
| 30633 | "char16 __ovld __cnfn convert_char16_sat_rtz(int16);\n" |
| 30634 | "char16 __ovld __cnfn convert_char16_rtp(int16);\n" |
| 30635 | "char16 __ovld __cnfn convert_char16_sat_rtp(int16);\n" |
| 30636 | "char16 __ovld __cnfn convert_char16_rtn(int16);\n" |
| 30637 | "char16 __ovld __cnfn convert_char16_sat_rtn(int16);\n" |
| 30638 | "char16 __ovld __cnfn convert_char16(int16);\n" |
| 30639 | "char16 __ovld __cnfn convert_char16_sat(int16);\n" |
| 30640 | "char16 __ovld __cnfn convert_char16_rte(uint16);\n" |
| 30641 | "char16 __ovld __cnfn convert_char16_sat_rte(uint16);\n" |
| 30642 | "char16 __ovld __cnfn convert_char16_rtz(uint16);\n" |
| 30643 | "char16 __ovld __cnfn convert_char16_sat_rtz(uint16);\n" |
| 30644 | "char16 __ovld __cnfn convert_char16_rtp(uint16);\n" |
| 30645 | "char16 __ovld __cnfn convert_char16_sat_rtp(uint16);\n" |
| 30646 | "char16 __ovld __cnfn convert_char16_rtn(uint16);\n" |
| 30647 | "char16 __ovld __cnfn convert_char16_sat_rtn(uint16);\n" |
| 30648 | "char16 __ovld __cnfn convert_char16(uint16);\n" |
| 30649 | "char16 __ovld __cnfn convert_char16_sat(uint16);\n" |
| 30650 | "char16 __ovld __cnfn convert_char16_rte(long16);\n" |
| 30651 | "char16 __ovld __cnfn convert_char16_sat_rte(long16);\n" |
| 30652 | "char16 __ovld __cnfn convert_char16_rtz(long16);\n" |
| 30653 | "char16 __ovld __cnfn convert_char16_sat_rtz(long16);\n" |
| 30654 | "char16 __ovld __cnfn convert_char16_rtp(long16);\n" |
| 30655 | "char16 __ovld __cnfn convert_char16_sat_rtp(long16);\n" |
| 30656 | "char16 __ovld __cnfn convert_char16_rtn(long16);\n" |
| 30657 | "char16 __ovld __cnfn convert_char16_sat_rtn(long16);\n" |
| 30658 | "char16 __ovld __cnfn convert_char16(long16);\n" |
| 30659 | "char16 __ovld __cnfn convert_char16_sat(long16);\n" |
| 30660 | "char16 __ovld __cnfn convert_char16_rte(ulong16);\n" |
| 30661 | "char16 __ovld __cnfn convert_char16_sat_rte(ulong16);\n" |
| 30662 | "char16 __ovld __cnfn convert_char16_rtz(ulong16);\n" |
| 30663 | "char16 __ovld __cnfn convert_char16_sat_rtz(ulong16);\n" |
| 30664 | "char16 __ovld __cnfn convert_char16_rtp(ulong16);\n" |
| 30665 | "char16 __ovld __cnfn convert_char16_sat_rtp(ulong16);\n" |
| 30666 | "char16 __ovld __cnfn convert_char16_rtn(ulong16);\n" |
| 30667 | "char16 __ovld __cnfn convert_char16_sat_rtn(ulong16);\n" |
| 30668 | "char16 __ovld __cnfn convert_char16(ulong16);\n" |
| 30669 | "char16 __ovld __cnfn convert_char16_sat(ulong16);\n" |
| 30670 | "char16 __ovld __cnfn convert_char16_rte(float16);\n" |
| 30671 | "char16 __ovld __cnfn convert_char16_sat_rte(float16);\n" |
| 30672 | "char16 __ovld __cnfn convert_char16_rtz(float16);\n" |
| 30673 | "char16 __ovld __cnfn convert_char16_sat_rtz(float16);\n" |
| 30674 | "char16 __ovld __cnfn convert_char16_rtp(float16);\n" |
| 30675 | "char16 __ovld __cnfn convert_char16_sat_rtp(float16);\n" |
| 30676 | "char16 __ovld __cnfn convert_char16_rtn(float16);\n" |
| 30677 | "char16 __ovld __cnfn convert_char16_sat_rtn(float16);\n" |
| 30678 | "char16 __ovld __cnfn convert_char16(float16);\n" |
| 30679 | "char16 __ovld __cnfn convert_char16_sat(float16);\n" |
| 30680 | "uchar16 __ovld __cnfn convert_uchar16_rte(char16);\n" |
| 30681 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(char16);\n" |
| 30682 | "uchar16 __ovld __cnfn convert_uchar16_rtz(char16);\n" |
| 30683 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(char16);\n" |
| 30684 | "uchar16 __ovld __cnfn convert_uchar16_rtp(char16);\n" |
| 30685 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(char16);\n" |
| 30686 | "uchar16 __ovld __cnfn convert_uchar16_rtn(char16);\n" |
| 30687 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(char16);\n" |
| 30688 | "uchar16 __ovld __cnfn convert_uchar16(char16);\n" |
| 30689 | "uchar16 __ovld __cnfn convert_uchar16_sat(char16);\n" |
| 30690 | "uchar16 __ovld __cnfn convert_uchar16_rte(uchar16);\n" |
| 30691 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(uchar16);\n" |
| 30692 | "uchar16 __ovld __cnfn convert_uchar16_rtz(uchar16);\n" |
| 30693 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uchar16);\n" |
| 30694 | "uchar16 __ovld __cnfn convert_uchar16_rtp(uchar16);\n" |
| 30695 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uchar16);\n" |
| 30696 | "uchar16 __ovld __cnfn convert_uchar16_rtn(uchar16);\n" |
| 30697 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uchar16);\n" |
| 30698 | "uchar16 __ovld __cnfn convert_uchar16(uchar16);\n" |
| 30699 | "uchar16 __ovld __cnfn convert_uchar16_sat(uchar16);\n" |
| 30700 | "uchar16 __ovld __cnfn convert_uchar16_rte(short16);\n" |
| 30701 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(short16);\n" |
| 30702 | "uchar16 __ovld __cnfn convert_uchar16_rtz(short16);\n" |
| 30703 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(short16);\n" |
| 30704 | "uchar16 __ovld __cnfn convert_uchar16_rtp(short16);\n" |
| 30705 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(short16);\n" |
| 30706 | "uchar16 __ovld __cnfn convert_uchar16_rtn(short16);\n" |
| 30707 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(short16);\n" |
| 30708 | "uchar16 __ovld __cnfn convert_uchar16(short16);\n" |
| 30709 | "uchar16 __ovld __cnfn convert_uchar16_sat(short16);\n" |
| 30710 | "uchar16 __ovld __cnfn convert_uchar16_rte(ushort16);\n" |
| 30711 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(ushort16);\n" |
| 30712 | "uchar16 __ovld __cnfn convert_uchar16_rtz(ushort16);\n" |
| 30713 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ushort16);\n" |
| 30714 | "uchar16 __ovld __cnfn convert_uchar16_rtp(ushort16);\n" |
| 30715 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ushort16);\n" |
| 30716 | "uchar16 __ovld __cnfn convert_uchar16_rtn(ushort16);\n" |
| 30717 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ushort16);\n" |
| 30718 | "uchar16 __ovld __cnfn convert_uchar16(ushort16);\n" |
| 30719 | "uchar16 __ovld __cnfn convert_uchar16_sat(ushort16);\n" |
| 30720 | "uchar16 __ovld __cnfn convert_uchar16_rte(int16);\n" |
| 30721 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(int16);\n" |
| 30722 | "uchar16 __ovld __cnfn convert_uchar16_rtz(int16);\n" |
| 30723 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(int16);\n" |
| 30724 | "uchar16 __ovld __cnfn convert_uchar16_rtp(int16);\n" |
| 30725 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(int16);\n" |
| 30726 | "uchar16 __ovld __cnfn convert_uchar16_rtn(int16);\n" |
| 30727 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(int16);\n" |
| 30728 | "uchar16 __ovld __cnfn convert_uchar16(int16);\n" |
| 30729 | "uchar16 __ovld __cnfn convert_uchar16_sat(int16);\n" |
| 30730 | "uchar16 __ovld __cnfn convert_uchar16_rte(uint16);\n" |
| 30731 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(uint16);\n" |
| 30732 | "uchar16 __ovld __cnfn convert_uchar16_rtz(uint16);\n" |
| 30733 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(uint16);\n" |
| 30734 | "uchar16 __ovld __cnfn convert_uchar16_rtp(uint16);\n" |
| 30735 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(uint16);\n" |
| 30736 | "uchar16 __ovld __cnfn convert_uchar16_rtn(uint16);\n" |
| 30737 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(uint16);\n" |
| 30738 | "uchar16 __ovld __cnfn convert_uchar16(uint16);\n" |
| 30739 | "uchar16 __ovld __cnfn convert_uchar16_sat(uint16);\n" |
| 30740 | "uchar16 __ovld __cnfn convert_uchar16_rte(long16);\n" |
| 30741 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(long16);\n" |
| 30742 | "uchar16 __ovld __cnfn convert_uchar16_rtz(long16);\n" |
| 30743 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(long16);\n" |
| 30744 | "uchar16 __ovld __cnfn convert_uchar16_rtp(long16);\n" |
| 30745 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(long16);\n" |
| 30746 | "uchar16 __ovld __cnfn convert_uchar16_rtn(long16);\n" |
| 30747 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(long16);\n" |
| 30748 | "uchar16 __ovld __cnfn convert_uchar16(long16);\n" |
| 30749 | "uchar16 __ovld __cnfn convert_uchar16_sat(long16);\n" |
| 30750 | "uchar16 __ovld __cnfn convert_uchar16_rte(ulong16);\n" |
| 30751 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(ulong16);\n" |
| 30752 | "uchar16 __ovld __cnfn convert_uchar16_rtz(ulong16);\n" |
| 30753 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(ulong16);\n" |
| 30754 | "uchar16 __ovld __cnfn convert_uchar16_rtp(ulong16);\n" |
| 30755 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(ulong16);\n" |
| 30756 | "uchar16 __ovld __cnfn convert_uchar16_rtn(ulong16);\n" |
| 30757 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(ulong16);\n" |
| 30758 | "uchar16 __ovld __cnfn convert_uchar16(ulong16);\n" |
| 30759 | "uchar16 __ovld __cnfn convert_uchar16_sat(ulong16);\n" |
| 30760 | "uchar16 __ovld __cnfn convert_uchar16_rte(float16);\n" |
| 30761 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(float16);\n" |
| 30762 | "uchar16 __ovld __cnfn convert_uchar16_rtz(float16);\n" |
| 30763 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(float16);\n" |
| 30764 | "uchar16 __ovld __cnfn convert_uchar16_rtp(float16);\n" |
| 30765 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(float16);\n" |
| 30766 | "uchar16 __ovld __cnfn convert_uchar16_rtn(float16);\n" |
| 30767 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(float16);\n" |
| 30768 | "uchar16 __ovld __cnfn convert_uchar16(float16);\n" |
| 30769 | "uchar16 __ovld __cnfn convert_uchar16_sat(float16);\n" |
| 30770 | "short16 __ovld __cnfn convert_short16_rte(char16);\n" |
| 30771 | "short16 __ovld __cnfn convert_short16_sat_rte(char16);\n" |
| 30772 | "short16 __ovld __cnfn convert_short16_rtz(char16);\n" |
| 30773 | "short16 __ovld __cnfn convert_short16_sat_rtz(char16);\n" |
| 30774 | "short16 __ovld __cnfn convert_short16_rtp(char16);\n" |
| 30775 | "short16 __ovld __cnfn convert_short16_sat_rtp(char16);\n" |
| 30776 | "short16 __ovld __cnfn convert_short16_rtn(char16);\n" |
| 30777 | "short16 __ovld __cnfn convert_short16_sat_rtn(char16);\n" |
| 30778 | "short16 __ovld __cnfn convert_short16(char16);\n" |
| 30779 | "short16 __ovld __cnfn convert_short16_sat(char16);\n" |
| 30780 | "short16 __ovld __cnfn convert_short16_rte(uchar16);\n" |
| 30781 | "short16 __ovld __cnfn convert_short16_sat_rte(uchar16);\n" |
| 30782 | "short16 __ovld __cnfn convert_short16_rtz(uchar16);\n" |
| 30783 | "short16 __ovld __cnfn convert_short16_sat_rtz(uchar16);\n" |
| 30784 | "short16 __ovld __cnfn convert_short16_rtp(uchar16);\n" |
| 30785 | "short16 __ovld __cnfn convert_short16_sat_rtp(uchar16);\n" |
| 30786 | "short16 __ovld __cnfn convert_short16_rtn(uchar16);\n" |
| 30787 | "short16 __ovld __cnfn convert_short16_sat_rtn(uchar16);\n" |
| 30788 | "short16 __ovld __cnfn convert_short16(uchar16);\n" |
| 30789 | "short16 __ovld __cnfn convert_short16_sat(uchar16);\n" |
| 30790 | "short16 __ovld __cnfn convert_short16_rte(short16);\n" |
| 30791 | "short16 __ovld __cnfn convert_short16_sat_rte(short16);\n" |
| 30792 | "short16 __ovld __cnfn convert_short16_rtz(short16);\n" |
| 30793 | "short16 __ovld __cnfn convert_short16_sat_rtz(short16);\n" |
| 30794 | "short16 __ovld __cnfn convert_short16_rtp(short16);\n" |
| 30795 | "short16 __ovld __cnfn convert_short16_sat_rtp(short16);\n" |
| 30796 | "short16 __ovld __cnfn convert_short16_rtn(short16);\n" |
| 30797 | "short16 __ovld __cnfn convert_short16_sat_rtn(short16);\n" |
| 30798 | "short16 __ovld __cnfn convert_short16(short16);\n" |
| 30799 | "short16 __ovld __cnfn convert_short16_sat(short16);\n" |
| 30800 | "short16 __ovld __cnfn convert_short16_rte(ushort16);\n" |
| 30801 | "short16 __ovld __cnfn convert_short16_sat_rte(ushort16);\n" |
| 30802 | "short16 __ovld __cnfn convert_short16_rtz(ushort16);\n" |
| 30803 | "short16 __ovld __cnfn convert_short16_sat_rtz(ushort16);\n" |
| 30804 | "short16 __ovld __cnfn convert_short16_rtp(ushort16);\n" |
| 30805 | "short16 __ovld __cnfn convert_short16_sat_rtp(ushort16);\n" |
| 30806 | "short16 __ovld __cnfn convert_short16_rtn(ushort16);\n" |
| 30807 | "short16 __ovld __cnfn convert_short16_sat_rtn(ushort16);\n" |
| 30808 | "short16 __ovld __cnfn convert_short16(ushort16);\n" |
| 30809 | "short16 __ovld __cnfn convert_short16_sat(ushort16);\n" |
| 30810 | "short16 __ovld __cnfn convert_short16_rte(int16);\n" |
| 30811 | "short16 __ovld __cnfn convert_short16_sat_rte(int16);\n" |
| 30812 | "short16 __ovld __cnfn convert_short16_rtz(int16);\n" |
| 30813 | "short16 __ovld __cnfn convert_short16_sat_rtz(int16);\n" |
| 30814 | "short16 __ovld __cnfn convert_short16_rtp(int16);\n" |
| 30815 | "short16 __ovld __cnfn convert_short16_sat_rtp(int16);\n" |
| 30816 | "short16 __ovld __cnfn convert_short16_rtn(int16);\n" |
| 30817 | "short16 __ovld __cnfn convert_short16_sat_rtn(int16);\n" |
| 30818 | "short16 __ovld __cnfn convert_short16(int16);\n" |
| 30819 | "short16 __ovld __cnfn convert_short16_sat(int16);\n" |
| 30820 | "short16 __ovld __cnfn convert_short16_rte(uint16);\n" |
| 30821 | "short16 __ovld __cnfn convert_short16_sat_rte(uint16);\n" |
| 30822 | "short16 __ovld __cnfn convert_short16_rtz(uint16);\n" |
| 30823 | "short16 __ovld __cnfn convert_short16_sat_rtz(uint16);\n" |
| 30824 | "short16 __ovld __cnfn convert_short16_rtp(uint16);\n" |
| 30825 | "short16 __ovld __cnfn convert_short16_sat_rtp(uint16);\n" |
| 30826 | "short16 __ovld __cnfn convert_short16_rtn(uint16);\n" |
| 30827 | "short16 __ovld __cnfn convert_short16_sat_rtn(uint16);\n" |
| 30828 | "short16 __ovld __cnfn convert_short16(uint16);\n" |
| 30829 | "short16 __ovld __cnfn convert_short16_sat(uint16);\n" |
| 30830 | "short16 __ovld __cnfn convert_short16_rte(long16);\n" |
| 30831 | "short16 __ovld __cnfn convert_short16_sat_rte(long16);\n" |
| 30832 | "short16 __ovld __cnfn convert_short16_rtz(long16);\n" |
| 30833 | "short16 __ovld __cnfn convert_short16_sat_rtz(long16);\n" |
| 30834 | "short16 __ovld __cnfn convert_short16_rtp(long16);\n" |
| 30835 | "short16 __ovld __cnfn convert_short16_sat_rtp(long16);\n" |
| 30836 | "short16 __ovld __cnfn convert_short16_rtn(long16);\n" |
| 30837 | "short16 __ovld __cnfn convert_short16_sat_rtn(long16);\n" |
| 30838 | "short16 __ovld __cnfn convert_short16(long16);\n" |
| 30839 | "short16 __ovld __cnfn convert_short16_sat(long16);\n" |
| 30840 | "short16 __ovld __cnfn convert_short16_rte(ulong16);\n" |
| 30841 | "short16 __ovld __cnfn convert_short16_sat_rte(ulong16);\n" |
| 30842 | "short16 __ovld __cnfn convert_short16_rtz(ulong16);\n" |
| 30843 | "short16 __ovld __cnfn convert_short16_sat_rtz(ulong16);\n" |
| 30844 | "short16 __ovld __cnfn convert_short16_rtp(ulong16);\n" |
| 30845 | "short16 __ovld __cnfn convert_short16_sat_rtp(ulong16);\n" |
| 30846 | "short16 __ovld __cnfn convert_short16_rtn(ulong16);\n" |
| 30847 | "short16 __ovld __cnfn convert_short16_sat_rtn(ulong16);\n" |
| 30848 | "short16 __ovld __cnfn convert_short16(ulong16);\n" |
| 30849 | "short16 __ovld __cnfn convert_short16_sat(ulong16);\n" |
| 30850 | "short16 __ovld __cnfn convert_short16_rte(float16);\n" |
| 30851 | "short16 __ovld __cnfn convert_short16_sat_rte(float16);\n" |
| 30852 | "short16 __ovld __cnfn convert_short16_rtz(float16);\n" |
| 30853 | "short16 __ovld __cnfn convert_short16_sat_rtz(float16);\n" |
| 30854 | "short16 __ovld __cnfn convert_short16_rtp(float16);\n" |
| 30855 | "short16 __ovld __cnfn convert_short16_sat_rtp(float16);\n" |
| 30856 | "short16 __ovld __cnfn convert_short16_rtn(float16);\n" |
| 30857 | "short16 __ovld __cnfn convert_short16_sat_rtn(float16);\n" |
| 30858 | "short16 __ovld __cnfn convert_short16(float16);\n" |
| 30859 | "short16 __ovld __cnfn convert_short16_sat(float16);\n" |
| 30860 | "ushort16 __ovld __cnfn convert_ushort16_rte(char16);\n" |
| 30861 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(char16);\n" |
| 30862 | "ushort16 __ovld __cnfn convert_ushort16_rtz(char16);\n" |
| 30863 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(char16);\n" |
| 30864 | "ushort16 __ovld __cnfn convert_ushort16_rtp(char16);\n" |
| 30865 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(char16);\n" |
| 30866 | "ushort16 __ovld __cnfn convert_ushort16_rtn(char16);\n" |
| 30867 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(char16);\n" |
| 30868 | "ushort16 __ovld __cnfn convert_ushort16(char16);\n" |
| 30869 | "ushort16 __ovld __cnfn convert_ushort16_sat(char16);\n" |
| 30870 | "ushort16 __ovld __cnfn convert_ushort16_rte(uchar16);\n" |
| 30871 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(uchar16);\n" |
| 30872 | "ushort16 __ovld __cnfn convert_ushort16_rtz(uchar16);\n" |
| 30873 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uchar16);\n" |
| 30874 | "ushort16 __ovld __cnfn convert_ushort16_rtp(uchar16);\n" |
| 30875 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uchar16);\n" |
| 30876 | "ushort16 __ovld __cnfn convert_ushort16_rtn(uchar16);\n" |
| 30877 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uchar16);\n" |
| 30878 | "ushort16 __ovld __cnfn convert_ushort16(uchar16);\n" |
| 30879 | "ushort16 __ovld __cnfn convert_ushort16_sat(uchar16);\n" |
| 30880 | "ushort16 __ovld __cnfn convert_ushort16_rte(short16);\n" |
| 30881 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(short16);\n" |
| 30882 | "ushort16 __ovld __cnfn convert_ushort16_rtz(short16);\n" |
| 30883 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(short16);\n" |
| 30884 | "ushort16 __ovld __cnfn convert_ushort16_rtp(short16);\n" |
| 30885 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(short16);\n" |
| 30886 | "ushort16 __ovld __cnfn convert_ushort16_rtn(short16);\n" |
| 30887 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(short16);\n" |
| 30888 | "ushort16 __ovld __cnfn convert_ushort16(short16);\n" |
| 30889 | "ushort16 __ovld __cnfn convert_ushort16_sat(short16);\n" |
| 30890 | "ushort16 __ovld __cnfn convert_ushort16_rte(ushort16);\n" |
| 30891 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(ushort16);\n" |
| 30892 | "ushort16 __ovld __cnfn convert_ushort16_rtz(ushort16);\n" |
| 30893 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ushort16);\n" |
| 30894 | "ushort16 __ovld __cnfn convert_ushort16_rtp(ushort16);\n" |
| 30895 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ushort16);\n" |
| 30896 | "ushort16 __ovld __cnfn convert_ushort16_rtn(ushort16);\n" |
| 30897 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ushort16);\n" |
| 30898 | "ushort16 __ovld __cnfn convert_ushort16(ushort16);\n" |
| 30899 | "ushort16 __ovld __cnfn convert_ushort16_sat(ushort16);\n" |
| 30900 | "ushort16 __ovld __cnfn convert_ushort16_rte(int16);\n" |
| 30901 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(int16);\n" |
| 30902 | "ushort16 __ovld __cnfn convert_ushort16_rtz(int16);\n" |
| 30903 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(int16);\n" |
| 30904 | "ushort16 __ovld __cnfn convert_ushort16_rtp(int16);\n" |
| 30905 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(int16);\n" |
| 30906 | "ushort16 __ovld __cnfn convert_ushort16_rtn(int16);\n" |
| 30907 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(int16);\n" |
| 30908 | "ushort16 __ovld __cnfn convert_ushort16(int16);\n" |
| 30909 | "ushort16 __ovld __cnfn convert_ushort16_sat(int16);\n" |
| 30910 | "ushort16 __ovld __cnfn convert_ushort16_rte(uint16);\n" |
| 30911 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(uint16);\n" |
| 30912 | "ushort16 __ovld __cnfn convert_ushort16_rtz(uint16);\n" |
| 30913 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(uint16);\n" |
| 30914 | "ushort16 __ovld __cnfn convert_ushort16_rtp(uint16);\n" |
| 30915 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(uint16);\n" |
| 30916 | "ushort16 __ovld __cnfn convert_ushort16_rtn(uint16);\n" |
| 30917 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(uint16);\n" |
| 30918 | "ushort16 __ovld __cnfn convert_ushort16(uint16);\n" |
| 30919 | "ushort16 __ovld __cnfn convert_ushort16_sat(uint16);\n" |
| 30920 | "ushort16 __ovld __cnfn convert_ushort16_rte(long16);\n" |
| 30921 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(long16);\n" |
| 30922 | "ushort16 __ovld __cnfn convert_ushort16_rtz(long16);\n" |
| 30923 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(long16);\n" |
| 30924 | "ushort16 __ovld __cnfn convert_ushort16_rtp(long16);\n" |
| 30925 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(long16);\n" |
| 30926 | "ushort16 __ovld __cnfn convert_ushort16_rtn(long16);\n" |
| 30927 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(long16);\n" |
| 30928 | "ushort16 __ovld __cnfn convert_ushort16(long16);\n" |
| 30929 | "ushort16 __ovld __cnfn convert_ushort16_sat(long16);\n" |
| 30930 | "ushort16 __ovld __cnfn convert_ushort16_rte(ulong16);\n" |
| 30931 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(ulong16);\n" |
| 30932 | "ushort16 __ovld __cnfn convert_ushort16_rtz(ulong16);\n" |
| 30933 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(ulong16);\n" |
| 30934 | "ushort16 __ovld __cnfn convert_ushort16_rtp(ulong16);\n" |
| 30935 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(ulong16);\n" |
| 30936 | "ushort16 __ovld __cnfn convert_ushort16_rtn(ulong16);\n" |
| 30937 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(ulong16);\n" |
| 30938 | "ushort16 __ovld __cnfn convert_ushort16(ulong16);\n" |
| 30939 | "ushort16 __ovld __cnfn convert_ushort16_sat(ulong16);\n" |
| 30940 | "ushort16 __ovld __cnfn convert_ushort16_rte(float16);\n" |
| 30941 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(float16);\n" |
| 30942 | "ushort16 __ovld __cnfn convert_ushort16_rtz(float16);\n" |
| 30943 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(float16);\n" |
| 30944 | "ushort16 __ovld __cnfn convert_ushort16_rtp(float16);\n" |
| 30945 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(float16);\n" |
| 30946 | "ushort16 __ovld __cnfn convert_ushort16_rtn(float16);\n" |
| 30947 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(float16);\n" |
| 30948 | "ushort16 __ovld __cnfn convert_ushort16(float16);\n" |
| 30949 | "ushort16 __ovld __cnfn convert_ushort16_sat(float16);\n" |
| 30950 | "int16 __ovld __cnfn convert_int16_rte(char16);\n" |
| 30951 | "int16 __ovld __cnfn convert_int16_sat_rte(char16);\n" |
| 30952 | "int16 __ovld __cnfn convert_int16_rtz(char16);\n" |
| 30953 | "int16 __ovld __cnfn convert_int16_sat_rtz(char16);\n" |
| 30954 | "int16 __ovld __cnfn convert_int16_rtp(char16);\n" |
| 30955 | "int16 __ovld __cnfn convert_int16_sat_rtp(char16);\n" |
| 30956 | "int16 __ovld __cnfn convert_int16_rtn(char16);\n" |
| 30957 | "int16 __ovld __cnfn convert_int16_sat_rtn(char16);\n" |
| 30958 | "int16 __ovld __cnfn convert_int16(char16);\n" |
| 30959 | "int16 __ovld __cnfn convert_int16_sat(char16);\n" |
| 30960 | "int16 __ovld __cnfn convert_int16_rte(uchar16);\n" |
| 30961 | "int16 __ovld __cnfn convert_int16_sat_rte(uchar16);\n" |
| 30962 | "int16 __ovld __cnfn convert_int16_rtz(uchar16);\n" |
| 30963 | "int16 __ovld __cnfn convert_int16_sat_rtz(uchar16);\n" |
| 30964 | "int16 __ovld __cnfn convert_int16_rtp(uchar16);\n" |
| 30965 | "int16 __ovld __cnfn convert_int16_sat_rtp(uchar16);\n" |
| 30966 | "int16 __ovld __cnfn convert_int16_rtn(uchar16);\n" |
| 30967 | "int16 __ovld __cnfn convert_int16_sat_rtn(uchar16);\n" |
| 30968 | "int16 __ovld __cnfn convert_int16(uchar16);\n" |
| 30969 | "int16 __ovld __cnfn convert_int16_sat(uchar16);\n" |
| 30970 | "int16 __ovld __cnfn convert_int16_rte(short16);\n" |
| 30971 | "int16 __ovld __cnfn convert_int16_sat_rte(short16);\n" |
| 30972 | "int16 __ovld __cnfn convert_int16_rtz(short16);\n" |
| 30973 | "int16 __ovld __cnfn convert_int16_sat_rtz(short16);\n" |
| 30974 | "int16 __ovld __cnfn convert_int16_rtp(short16);\n" |
| 30975 | "int16 __ovld __cnfn convert_int16_sat_rtp(short16);\n" |
| 30976 | "int16 __ovld __cnfn convert_int16_rtn(short16);\n" |
| 30977 | "int16 __ovld __cnfn convert_int16_sat_rtn(short16);\n" |
| 30978 | "int16 __ovld __cnfn convert_int16(short16);\n" |
| 30979 | "int16 __ovld __cnfn convert_int16_sat(short16);\n" |
| 30980 | "int16 __ovld __cnfn convert_int16_rte(ushort16);\n" |
| 30981 | "int16 __ovld __cnfn convert_int16_sat_rte(ushort16);\n" |
| 30982 | "int16 __ovld __cnfn convert_int16_rtz(ushort16);\n" |
| 30983 | "int16 __ovld __cnfn convert_int16_sat_rtz(ushort16);\n" |
| 30984 | "int16 __ovld __cnfn convert_int16_rtp(ushort16);\n" |
| 30985 | "int16 __ovld __cnfn convert_int16_sat_rtp(ushort16);\n" |
| 30986 | "int16 __ovld __cnfn convert_int16_rtn(ushort16);\n" |
| 30987 | "int16 __ovld __cnfn convert_int16_sat_rtn(ushort16);\n" |
| 30988 | "int16 __ovld __cnfn convert_int16(ushort16);\n" |
| 30989 | "int16 __ovld __cnfn convert_int16_sat(ushort16);\n" |
| 30990 | "int16 __ovld __cnfn convert_int16_rte(int16);\n" |
| 30991 | "int16 __ovld __cnfn convert_int16_sat_rte(int16);\n" |
| 30992 | "int16 __ovld __cnfn convert_int16_rtz(int16);\n" |
| 30993 | "int16 __ovld __cnfn convert_int16_sat_rtz(int16);\n" |
| 30994 | "int16 __ovld __cnfn convert_int16_rtp(int16);\n" |
| 30995 | "int16 __ovld __cnfn convert_int16_sat_rtp(int16);\n" |
| 30996 | "int16 __ovld __cnfn convert_int16_rtn(int16);\n" |
| 30997 | "int16 __ovld __cnfn convert_int16_sat_rtn(int16);\n" |
| 30998 | "int16 __ovld __cnfn convert_int16(int16);\n" |
| 30999 | "int16 __ovld __cnfn convert_int16_sat(int16);\n" |
| 31000 | "int16 __ovld __cnfn convert_int16_rte(uint16);\n" |
| 31001 | "int16 __ovld __cnfn convert_int16_sat_rte(uint16);\n" |
| 31002 | "int16 __ovld __cnfn convert_int16_rtz(uint16);\n" |
| 31003 | "int16 __ovld __cnfn convert_int16_sat_rtz(uint16);\n" |
| 31004 | "int16 __ovld __cnfn convert_int16_rtp(uint16);\n" |
| 31005 | "int16 __ovld __cnfn convert_int16_sat_rtp(uint16);\n" |
| 31006 | "int16 __ovld __cnfn convert_int16_rtn(uint16);\n" |
| 31007 | "int16 __ovld __cnfn convert_int16_sat_rtn(uint16);\n" |
| 31008 | "int16 __ovld __cnfn convert_int16(uint16);\n" |
| 31009 | "int16 __ovld __cnfn convert_int16_sat(uint16);\n" |
| 31010 | "int16 __ovld __cnfn convert_int16_rte(long16);\n" |
| 31011 | "int16 __ovld __cnfn convert_int16_sat_rte(long16);\n" |
| 31012 | "int16 __ovld __cnfn convert_int16_rtz(long16);\n" |
| 31013 | "int16 __ovld __cnfn convert_int16_sat_rtz(long16);\n" |
| 31014 | "int16 __ovld __cnfn convert_int16_rtp(long16);\n" |
| 31015 | "int16 __ovld __cnfn convert_int16_sat_rtp(long16);\n" |
| 31016 | "int16 __ovld __cnfn convert_int16_rtn(long16);\n" |
| 31017 | "int16 __ovld __cnfn convert_int16_sat_rtn(long16);\n" |
| 31018 | "int16 __ovld __cnfn convert_int16(long16);\n" |
| 31019 | "int16 __ovld __cnfn convert_int16_sat(long16);\n" |
| 31020 | "int16 __ovld __cnfn convert_int16_rte(ulong16);\n" |
| 31021 | "int16 __ovld __cnfn convert_int16_sat_rte(ulong16);\n" |
| 31022 | "int16 __ovld __cnfn convert_int16_rtz(ulong16);\n" |
| 31023 | "int16 __ovld __cnfn convert_int16_sat_rtz(ulong16);\n" |
| 31024 | "int16 __ovld __cnfn convert_int16_rtp(ulong16);\n" |
| 31025 | "int16 __ovld __cnfn convert_int16_sat_rtp(ulong16);\n" |
| 31026 | "int16 __ovld __cnfn convert_int16_rtn(ulong16);\n" |
| 31027 | "int16 __ovld __cnfn convert_int16_sat_rtn(ulong16);\n" |
| 31028 | "int16 __ovld __cnfn convert_int16(ulong16);\n" |
| 31029 | "int16 __ovld __cnfn convert_int16_sat(ulong16);\n" |
| 31030 | "int16 __ovld __cnfn convert_int16_rte(float16);\n" |
| 31031 | "int16 __ovld __cnfn convert_int16_sat_rte(float16);\n" |
| 31032 | "int16 __ovld __cnfn convert_int16_rtz(float16);\n" |
| 31033 | "int16 __ovld __cnfn convert_int16_sat_rtz(float16);\n" |
| 31034 | "int16 __ovld __cnfn convert_int16_rtp(float16);\n" |
| 31035 | "int16 __ovld __cnfn convert_int16_sat_rtp(float16);\n" |
| 31036 | "int16 __ovld __cnfn convert_int16_rtn(float16);\n" |
| 31037 | "int16 __ovld __cnfn convert_int16_sat_rtn(float16);\n" |
| 31038 | "int16 __ovld __cnfn convert_int16(float16);\n" |
| 31039 | "int16 __ovld __cnfn convert_int16_sat(float16);\n" |
| 31040 | "uint16 __ovld __cnfn convert_uint16_rte(char16);\n" |
| 31041 | "uint16 __ovld __cnfn convert_uint16_sat_rte(char16);\n" |
| 31042 | "uint16 __ovld __cnfn convert_uint16_rtz(char16);\n" |
| 31043 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(char16);\n" |
| 31044 | "uint16 __ovld __cnfn convert_uint16_rtp(char16);\n" |
| 31045 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(char16);\n" |
| 31046 | "uint16 __ovld __cnfn convert_uint16_rtn(char16);\n" |
| 31047 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(char16);\n" |
| 31048 | "uint16 __ovld __cnfn convert_uint16(char16);\n" |
| 31049 | "uint16 __ovld __cnfn convert_uint16_sat(char16);\n" |
| 31050 | "uint16 __ovld __cnfn convert_uint16_rte(uchar16);\n" |
| 31051 | "uint16 __ovld __cnfn convert_uint16_sat_rte(uchar16);\n" |
| 31052 | "uint16 __ovld __cnfn convert_uint16_rtz(uchar16);\n" |
| 31053 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(uchar16);\n" |
| 31054 | "uint16 __ovld __cnfn convert_uint16_rtp(uchar16);\n" |
| 31055 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(uchar16);\n" |
| 31056 | "uint16 __ovld __cnfn convert_uint16_rtn(uchar16);\n" |
| 31057 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(uchar16);\n" |
| 31058 | "uint16 __ovld __cnfn convert_uint16(uchar16);\n" |
| 31059 | "uint16 __ovld __cnfn convert_uint16_sat(uchar16);\n" |
| 31060 | "uint16 __ovld __cnfn convert_uint16_rte(short16);\n" |
| 31061 | "uint16 __ovld __cnfn convert_uint16_sat_rte(short16);\n" |
| 31062 | "uint16 __ovld __cnfn convert_uint16_rtz(short16);\n" |
| 31063 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(short16);\n" |
| 31064 | "uint16 __ovld __cnfn convert_uint16_rtp(short16);\n" |
| 31065 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(short16);\n" |
| 31066 | "uint16 __ovld __cnfn convert_uint16_rtn(short16);\n" |
| 31067 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(short16);\n" |
| 31068 | "uint16 __ovld __cnfn convert_uint16(short16);\n" |
| 31069 | "uint16 __ovld __cnfn convert_uint16_sat(short16);\n" |
| 31070 | "uint16 __ovld __cnfn convert_uint16_rte(ushort16);\n" |
| 31071 | "uint16 __ovld __cnfn convert_uint16_sat_rte(ushort16);\n" |
| 31072 | "uint16 __ovld __cnfn convert_uint16_rtz(ushort16);\n" |
| 31073 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(ushort16);\n" |
| 31074 | "uint16 __ovld __cnfn convert_uint16_rtp(ushort16);\n" |
| 31075 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(ushort16);\n" |
| 31076 | "uint16 __ovld __cnfn convert_uint16_rtn(ushort16);\n" |
| 31077 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(ushort16);\n" |
| 31078 | "uint16 __ovld __cnfn convert_uint16(ushort16);\n" |
| 31079 | "uint16 __ovld __cnfn convert_uint16_sat(ushort16);\n" |
| 31080 | "uint16 __ovld __cnfn convert_uint16_rte(int16);\n" |
| 31081 | "uint16 __ovld __cnfn convert_uint16_sat_rte(int16);\n" |
| 31082 | "uint16 __ovld __cnfn convert_uint16_rtz(int16);\n" |
| 31083 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(int16);\n" |
| 31084 | "uint16 __ovld __cnfn convert_uint16_rtp(int16);\n" |
| 31085 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(int16);\n" |
| 31086 | "uint16 __ovld __cnfn convert_uint16_rtn(int16);\n" |
| 31087 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(int16);\n" |
| 31088 | "uint16 __ovld __cnfn convert_uint16(int16);\n" |
| 31089 | "uint16 __ovld __cnfn convert_uint16_sat(int16);\n" |
| 31090 | "uint16 __ovld __cnfn convert_uint16_rte(uint16);\n" |
| 31091 | "uint16 __ovld __cnfn convert_uint16_sat_rte(uint16);\n" |
| 31092 | "uint16 __ovld __cnfn convert_uint16_rtz(uint16);\n" |
| 31093 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(uint16);\n" |
| 31094 | "uint16 __ovld __cnfn convert_uint16_rtp(uint16);\n" |
| 31095 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(uint16);\n" |
| 31096 | "uint16 __ovld __cnfn convert_uint16_rtn(uint16);\n" |
| 31097 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(uint16);\n" |
| 31098 | "uint16 __ovld __cnfn convert_uint16(uint16);\n" |
| 31099 | "uint16 __ovld __cnfn convert_uint16_sat(uint16);\n" |
| 31100 | "uint16 __ovld __cnfn convert_uint16_rte(long16);\n" |
| 31101 | "uint16 __ovld __cnfn convert_uint16_sat_rte(long16);\n" |
| 31102 | "uint16 __ovld __cnfn convert_uint16_rtz(long16);\n" |
| 31103 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(long16);\n" |
| 31104 | "uint16 __ovld __cnfn convert_uint16_rtp(long16);\n" |
| 31105 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(long16);\n" |
| 31106 | "uint16 __ovld __cnfn convert_uint16_rtn(long16);\n" |
| 31107 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(long16);\n" |
| 31108 | "uint16 __ovld __cnfn convert_uint16(long16);\n" |
| 31109 | "uint16 __ovld __cnfn convert_uint16_sat(long16);\n" |
| 31110 | "uint16 __ovld __cnfn convert_uint16_rte(ulong16);\n" |
| 31111 | "uint16 __ovld __cnfn convert_uint16_sat_rte(ulong16);\n" |
| 31112 | "uint16 __ovld __cnfn convert_uint16_rtz(ulong16);\n" |
| 31113 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(ulong16);\n" |
| 31114 | "uint16 __ovld __cnfn convert_uint16_rtp(ulong16);\n" |
| 31115 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(ulong16);\n" |
| 31116 | "uint16 __ovld __cnfn convert_uint16_rtn(ulong16);\n" |
| 31117 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(ulong16);\n" |
| 31118 | "uint16 __ovld __cnfn convert_uint16(ulong16);\n" |
| 31119 | "uint16 __ovld __cnfn convert_uint16_sat(ulong16);\n" |
| 31120 | "uint16 __ovld __cnfn convert_uint16_rte(float16);\n" |
| 31121 | "uint16 __ovld __cnfn convert_uint16_sat_rte(float16);\n" |
| 31122 | "uint16 __ovld __cnfn convert_uint16_rtz(float16);\n" |
| 31123 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(float16);\n" |
| 31124 | "uint16 __ovld __cnfn convert_uint16_rtp(float16);\n" |
| 31125 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(float16);\n" |
| 31126 | "uint16 __ovld __cnfn convert_uint16_rtn(float16);\n" |
| 31127 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(float16);\n" |
| 31128 | "uint16 __ovld __cnfn convert_uint16(float16);\n" |
| 31129 | "uint16 __ovld __cnfn convert_uint16_sat(float16);\n" |
| 31130 | "long16 __ovld __cnfn convert_long16_rte(char16);\n" |
| 31131 | "long16 __ovld __cnfn convert_long16_sat_rte(char16);\n" |
| 31132 | "long16 __ovld __cnfn convert_long16_rtz(char16);\n" |
| 31133 | "long16 __ovld __cnfn convert_long16_sat_rtz(char16);\n" |
| 31134 | "long16 __ovld __cnfn convert_long16_rtp(char16);\n" |
| 31135 | "long16 __ovld __cnfn convert_long16_sat_rtp(char16);\n" |
| 31136 | "long16 __ovld __cnfn convert_long16_rtn(char16);\n" |
| 31137 | "long16 __ovld __cnfn convert_long16_sat_rtn(char16);\n" |
| 31138 | "long16 __ovld __cnfn convert_long16(char16);\n" |
| 31139 | "long16 __ovld __cnfn convert_long16_sat(char16);\n" |
| 31140 | "long16 __ovld __cnfn convert_long16_rte(uchar16);\n" |
| 31141 | "long16 __ovld __cnfn convert_long16_sat_rte(uchar16);\n" |
| 31142 | "long16 __ovld __cnfn convert_long16_rtz(uchar16);\n" |
| 31143 | "long16 __ovld __cnfn convert_long16_sat_rtz(uchar16);\n" |
| 31144 | "long16 __ovld __cnfn convert_long16_rtp(uchar16);\n" |
| 31145 | "long16 __ovld __cnfn convert_long16_sat_rtp(uchar16);\n" |
| 31146 | "long16 __ovld __cnfn convert_long16_rtn(uchar16);\n" |
| 31147 | "long16 __ovld __cnfn convert_long16_sat_rtn(uchar16);\n" |
| 31148 | "long16 __ovld __cnfn convert_long16(uchar16);\n" |
| 31149 | "long16 __ovld __cnfn convert_long16_sat(uchar16);\n" |
| 31150 | "long16 __ovld __cnfn convert_long16_rte(short16);\n" |
| 31151 | "long16 __ovld __cnfn convert_long16_sat_rte(short16);\n" |
| 31152 | "long16 __ovld __cnfn convert_long16_rtz(short16);\n" |
| 31153 | "long16 __ovld __cnfn convert_long16_sat_rtz(short16);\n" |
| 31154 | "long16 __ovld __cnfn convert_long16_rtp(short16);\n" |
| 31155 | "long16 __ovld __cnfn convert_long16_sat_rtp(short16);\n" |
| 31156 | "long16 __ovld __cnfn convert_long16_rtn(short16);\n" |
| 31157 | "long16 __ovld __cnfn convert_long16_sat_rtn(short16);\n" |
| 31158 | "long16 __ovld __cnfn convert_long16(short16);\n" |
| 31159 | "long16 __ovld __cnfn convert_long16_sat(short16);\n" |
| 31160 | "long16 __ovld __cnfn convert_long16_rte(ushort16);\n" |
| 31161 | "long16 __ovld __cnfn convert_long16_sat_rte(ushort16);\n" |
| 31162 | "long16 __ovld __cnfn convert_long16_rtz(ushort16);\n" |
| 31163 | "long16 __ovld __cnfn convert_long16_sat_rtz(ushort16);\n" |
| 31164 | "long16 __ovld __cnfn convert_long16_rtp(ushort16);\n" |
| 31165 | "long16 __ovld __cnfn convert_long16_sat_rtp(ushort16);\n" |
| 31166 | "long16 __ovld __cnfn convert_long16_rtn(ushort16);\n" |
| 31167 | "long16 __ovld __cnfn convert_long16_sat_rtn(ushort16);\n" |
| 31168 | "long16 __ovld __cnfn convert_long16(ushort16);\n" |
| 31169 | "long16 __ovld __cnfn convert_long16_sat(ushort16);\n" |
| 31170 | "long16 __ovld __cnfn convert_long16_rte(int16);\n" |
| 31171 | "long16 __ovld __cnfn convert_long16_sat_rte(int16);\n" |
| 31172 | "long16 __ovld __cnfn convert_long16_rtz(int16);\n" |
| 31173 | "long16 __ovld __cnfn convert_long16_sat_rtz(int16);\n" |
| 31174 | "long16 __ovld __cnfn convert_long16_rtp(int16);\n" |
| 31175 | "long16 __ovld __cnfn convert_long16_sat_rtp(int16);\n" |
| 31176 | "long16 __ovld __cnfn convert_long16_rtn(int16);\n" |
| 31177 | "long16 __ovld __cnfn convert_long16_sat_rtn(int16);\n" |
| 31178 | "long16 __ovld __cnfn convert_long16(int16);\n" |
| 31179 | "long16 __ovld __cnfn convert_long16_sat(int16);\n" |
| 31180 | "long16 __ovld __cnfn convert_long16_rte(uint16);\n" |
| 31181 | "long16 __ovld __cnfn convert_long16_sat_rte(uint16);\n" |
| 31182 | "long16 __ovld __cnfn convert_long16_rtz(uint16);\n" |
| 31183 | "long16 __ovld __cnfn convert_long16_sat_rtz(uint16);\n" |
| 31184 | "long16 __ovld __cnfn convert_long16_rtp(uint16);\n" |
| 31185 | "long16 __ovld __cnfn convert_long16_sat_rtp(uint16);\n" |
| 31186 | "long16 __ovld __cnfn convert_long16_rtn(uint16);\n" |
| 31187 | "long16 __ovld __cnfn convert_long16_sat_rtn(uint16);\n" |
| 31188 | "long16 __ovld __cnfn convert_long16(uint16);\n" |
| 31189 | "long16 __ovld __cnfn convert_long16_sat(uint16);\n" |
| 31190 | "long16 __ovld __cnfn convert_long16_rte(long16);\n" |
| 31191 | "long16 __ovld __cnfn convert_long16_sat_rte(long16);\n" |
| 31192 | "long16 __ovld __cnfn convert_long16_rtz(long16);\n" |
| 31193 | "long16 __ovld __cnfn convert_long16_sat_rtz(long16);\n" |
| 31194 | "long16 __ovld __cnfn convert_long16_rtp(long16);\n" |
| 31195 | "long16 __ovld __cnfn convert_long16_sat_rtp(long16);\n" |
| 31196 | "long16 __ovld __cnfn convert_long16_rtn(long16);\n" |
| 31197 | "long16 __ovld __cnfn convert_long16_sat_rtn(long16);\n" |
| 31198 | "long16 __ovld __cnfn convert_long16(long16);\n" |
| 31199 | "long16 __ovld __cnfn convert_long16_sat(long16);\n" |
| 31200 | "long16 __ovld __cnfn convert_long16_rte(ulong16);\n" |
| 31201 | "long16 __ovld __cnfn convert_long16_sat_rte(ulong16);\n" |
| 31202 | "long16 __ovld __cnfn convert_long16_rtz(ulong16);\n" |
| 31203 | "long16 __ovld __cnfn convert_long16_sat_rtz(ulong16);\n" |
| 31204 | "long16 __ovld __cnfn convert_long16_rtp(ulong16);\n" |
| 31205 | "long16 __ovld __cnfn convert_long16_sat_rtp(ulong16);\n" |
| 31206 | "long16 __ovld __cnfn convert_long16_rtn(ulong16);\n" |
| 31207 | "long16 __ovld __cnfn convert_long16_sat_rtn(ulong16);\n" |
| 31208 | "long16 __ovld __cnfn convert_long16(ulong16);\n" |
| 31209 | "long16 __ovld __cnfn convert_long16_sat(ulong16);\n" |
| 31210 | "long16 __ovld __cnfn convert_long16_rte(float16);\n" |
| 31211 | "long16 __ovld __cnfn convert_long16_sat_rte(float16);\n" |
| 31212 | "long16 __ovld __cnfn convert_long16_rtz(float16);\n" |
| 31213 | "long16 __ovld __cnfn convert_long16_sat_rtz(float16);\n" |
| 31214 | "long16 __ovld __cnfn convert_long16_rtp(float16);\n" |
| 31215 | "long16 __ovld __cnfn convert_long16_sat_rtp(float16);\n" |
| 31216 | "long16 __ovld __cnfn convert_long16_rtn(float16);\n" |
| 31217 | "long16 __ovld __cnfn convert_long16_sat_rtn(float16);\n" |
| 31218 | "long16 __ovld __cnfn convert_long16(float16);\n" |
| 31219 | "long16 __ovld __cnfn convert_long16_sat(float16);\n" |
| 31220 | "ulong16 __ovld __cnfn convert_ulong16_rte(char16);\n" |
| 31221 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(char16);\n" |
| 31222 | "ulong16 __ovld __cnfn convert_ulong16_rtz(char16);\n" |
| 31223 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(char16);\n" |
| 31224 | "ulong16 __ovld __cnfn convert_ulong16_rtp(char16);\n" |
| 31225 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(char16);\n" |
| 31226 | "ulong16 __ovld __cnfn convert_ulong16_rtn(char16);\n" |
| 31227 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(char16);\n" |
| 31228 | "ulong16 __ovld __cnfn convert_ulong16(char16);\n" |
| 31229 | "ulong16 __ovld __cnfn convert_ulong16_sat(char16);\n" |
| 31230 | "ulong16 __ovld __cnfn convert_ulong16_rte(uchar16);\n" |
| 31231 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(uchar16);\n" |
| 31232 | "ulong16 __ovld __cnfn convert_ulong16_rtz(uchar16);\n" |
| 31233 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uchar16);\n" |
| 31234 | "ulong16 __ovld __cnfn convert_ulong16_rtp(uchar16);\n" |
| 31235 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uchar16);\n" |
| 31236 | "ulong16 __ovld __cnfn convert_ulong16_rtn(uchar16);\n" |
| 31237 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uchar16);\n" |
| 31238 | "ulong16 __ovld __cnfn convert_ulong16(uchar16);\n" |
| 31239 | "ulong16 __ovld __cnfn convert_ulong16_sat(uchar16);\n" |
| 31240 | "ulong16 __ovld __cnfn convert_ulong16_rte(short16);\n" |
| 31241 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(short16);\n" |
| 31242 | "ulong16 __ovld __cnfn convert_ulong16_rtz(short16);\n" |
| 31243 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(short16);\n" |
| 31244 | "ulong16 __ovld __cnfn convert_ulong16_rtp(short16);\n" |
| 31245 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(short16);\n" |
| 31246 | "ulong16 __ovld __cnfn convert_ulong16_rtn(short16);\n" |
| 31247 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(short16);\n" |
| 31248 | "ulong16 __ovld __cnfn convert_ulong16(short16);\n" |
| 31249 | "ulong16 __ovld __cnfn convert_ulong16_sat(short16);\n" |
| 31250 | "ulong16 __ovld __cnfn convert_ulong16_rte(ushort16);\n" |
| 31251 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(ushort16);\n" |
| 31252 | "ulong16 __ovld __cnfn convert_ulong16_rtz(ushort16);\n" |
| 31253 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ushort16);\n" |
| 31254 | "ulong16 __ovld __cnfn convert_ulong16_rtp(ushort16);\n" |
| 31255 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ushort16);\n" |
| 31256 | "ulong16 __ovld __cnfn convert_ulong16_rtn(ushort16);\n" |
| 31257 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ushort16);\n" |
| 31258 | "ulong16 __ovld __cnfn convert_ulong16(ushort16);\n" |
| 31259 | "ulong16 __ovld __cnfn convert_ulong16_sat(ushort16);\n" |
| 31260 | "ulong16 __ovld __cnfn convert_ulong16_rte(int16);\n" |
| 31261 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(int16);\n" |
| 31262 | "ulong16 __ovld __cnfn convert_ulong16_rtz(int16);\n" |
| 31263 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(int16);\n" |
| 31264 | "ulong16 __ovld __cnfn convert_ulong16_rtp(int16);\n" |
| 31265 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(int16);\n" |
| 31266 | "ulong16 __ovld __cnfn convert_ulong16_rtn(int16);\n" |
| 31267 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(int16);\n" |
| 31268 | "ulong16 __ovld __cnfn convert_ulong16(int16);\n" |
| 31269 | "ulong16 __ovld __cnfn convert_ulong16_sat(int16);\n" |
| 31270 | "ulong16 __ovld __cnfn convert_ulong16_rte(uint16);\n" |
| 31271 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(uint16);\n" |
| 31272 | "ulong16 __ovld __cnfn convert_ulong16_rtz(uint16);\n" |
| 31273 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(uint16);\n" |
| 31274 | "ulong16 __ovld __cnfn convert_ulong16_rtp(uint16);\n" |
| 31275 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(uint16);\n" |
| 31276 | "ulong16 __ovld __cnfn convert_ulong16_rtn(uint16);\n" |
| 31277 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(uint16);\n" |
| 31278 | "ulong16 __ovld __cnfn convert_ulong16(uint16);\n" |
| 31279 | "ulong16 __ovld __cnfn convert_ulong16_sat(uint16);\n" |
| 31280 | "ulong16 __ovld __cnfn convert_ulong16_rte(long16);\n" |
| 31281 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(long16);\n" |
| 31282 | "ulong16 __ovld __cnfn convert_ulong16_rtz(long16);\n" |
| 31283 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(long16);\n" |
| 31284 | "ulong16 __ovld __cnfn convert_ulong16_rtp(long16);\n" |
| 31285 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(long16);\n" |
| 31286 | "ulong16 __ovld __cnfn convert_ulong16_rtn(long16);\n" |
| 31287 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(long16);\n" |
| 31288 | "ulong16 __ovld __cnfn convert_ulong16(long16);\n" |
| 31289 | "ulong16 __ovld __cnfn convert_ulong16_sat(long16);\n" |
| 31290 | "ulong16 __ovld __cnfn convert_ulong16_rte(ulong16);\n" |
| 31291 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(ulong16);\n" |
| 31292 | "ulong16 __ovld __cnfn convert_ulong16_rtz(ulong16);\n" |
| 31293 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(ulong16);\n" |
| 31294 | "ulong16 __ovld __cnfn convert_ulong16_rtp(ulong16);\n" |
| 31295 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(ulong16);\n" |
| 31296 | "ulong16 __ovld __cnfn convert_ulong16_rtn(ulong16);\n" |
| 31297 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(ulong16);\n" |
| 31298 | "ulong16 __ovld __cnfn convert_ulong16(ulong16);\n" |
| 31299 | "ulong16 __ovld __cnfn convert_ulong16_sat(ulong16);\n" |
| 31300 | "ulong16 __ovld __cnfn convert_ulong16_rte(float16);\n" |
| 31301 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(float16);\n" |
| 31302 | "ulong16 __ovld __cnfn convert_ulong16_rtz(float16);\n" |
| 31303 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(float16);\n" |
| 31304 | "ulong16 __ovld __cnfn convert_ulong16_rtp(float16);\n" |
| 31305 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(float16);\n" |
| 31306 | "ulong16 __ovld __cnfn convert_ulong16_rtn(float16);\n" |
| 31307 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(float16);\n" |
| 31308 | "ulong16 __ovld __cnfn convert_ulong16(float16);\n" |
| 31309 | "ulong16 __ovld __cnfn convert_ulong16_sat(float16);\n" |
| 31310 | "float16 __ovld __cnfn convert_float16_rte(char16);\n" |
| 31311 | "float16 __ovld __cnfn convert_float16_rtz(char16);\n" |
| 31312 | "float16 __ovld __cnfn convert_float16_rtp(char16);\n" |
| 31313 | "float16 __ovld __cnfn convert_float16_rtn(char16);\n" |
| 31314 | "float16 __ovld __cnfn convert_float16(char16);\n" |
| 31315 | "float16 __ovld __cnfn convert_float16_rte(uchar16);\n" |
| 31316 | "float16 __ovld __cnfn convert_float16_rtz(uchar16);\n" |
| 31317 | "float16 __ovld __cnfn convert_float16_rtp(uchar16);\n" |
| 31318 | "float16 __ovld __cnfn convert_float16_rtn(uchar16);\n" |
| 31319 | "float16 __ovld __cnfn convert_float16(uchar16);\n" |
| 31320 | "float16 __ovld __cnfn convert_float16_rte(short16);\n" |
| 31321 | "float16 __ovld __cnfn convert_float16_rtz(short16);\n" |
| 31322 | "float16 __ovld __cnfn convert_float16_rtp(short16);\n" |
| 31323 | "float16 __ovld __cnfn convert_float16_rtn(short16);\n" |
| 31324 | "float16 __ovld __cnfn convert_float16(short16);\n" |
| 31325 | "float16 __ovld __cnfn convert_float16_rte(ushort16);\n" |
| 31326 | "float16 __ovld __cnfn convert_float16_rtz(ushort16);\n" |
| 31327 | "float16 __ovld __cnfn convert_float16_rtp(ushort16);\n" |
| 31328 | "float16 __ovld __cnfn convert_float16_rtn(ushort16);\n" |
| 31329 | "float16 __ovld __cnfn convert_float16(ushort16);\n" |
| 31330 | "float16 __ovld __cnfn convert_float16_rte(int16);\n" |
| 31331 | "float16 __ovld __cnfn convert_float16_rtz(int16);\n" |
| 31332 | "float16 __ovld __cnfn convert_float16_rtp(int16);\n" |
| 31333 | "float16 __ovld __cnfn convert_float16_rtn(int16);\n" |
| 31334 | "float16 __ovld __cnfn convert_float16(int16);\n" |
| 31335 | "float16 __ovld __cnfn convert_float16_rte(uint16);\n" |
| 31336 | "float16 __ovld __cnfn convert_float16_rtz(uint16);\n" |
| 31337 | "float16 __ovld __cnfn convert_float16_rtp(uint16);\n" |
| 31338 | "float16 __ovld __cnfn convert_float16_rtn(uint16);\n" |
| 31339 | "float16 __ovld __cnfn convert_float16(uint16);\n" |
| 31340 | "float16 __ovld __cnfn convert_float16_rte(long16);\n" |
| 31341 | "float16 __ovld __cnfn convert_float16_rtz(long16);\n" |
| 31342 | "float16 __ovld __cnfn convert_float16_rtp(long16);\n" |
| 31343 | "float16 __ovld __cnfn convert_float16_rtn(long16);\n" |
| 31344 | "float16 __ovld __cnfn convert_float16(long16);\n" |
| 31345 | "float16 __ovld __cnfn convert_float16_rte(ulong16);\n" |
| 31346 | "float16 __ovld __cnfn convert_float16_rtz(ulong16);\n" |
| 31347 | "float16 __ovld __cnfn convert_float16_rtp(ulong16);\n" |
| 31348 | "float16 __ovld __cnfn convert_float16_rtn(ulong16);\n" |
| 31349 | "float16 __ovld __cnfn convert_float16(ulong16);\n" |
| 31350 | "float16 __ovld __cnfn convert_float16_rte(float16);\n" |
| 31351 | "float16 __ovld __cnfn convert_float16_rtz(float16);\n" |
| 31352 | "float16 __ovld __cnfn convert_float16_rtp(float16);\n" |
| 31353 | "float16 __ovld __cnfn convert_float16_rtn(float16);\n" |
| 31354 | "float16 __ovld __cnfn convert_float16(float16);\n" |
| 31355 | "\n" |
| 31356 | "// Conversions with double data type parameters or return value.\n" |
| 31357 | "\n" |
| 31358 | "#ifdef cl_khr_fp64\n" |
| 31359 | "char __ovld __cnfn convert_char(double);\n" |
| 31360 | "char __ovld __cnfn convert_char_rte(double);\n" |
| 31361 | "char __ovld __cnfn convert_char_rtn(double);\n" |
| 31362 | "char __ovld __cnfn convert_char_rtp(double);\n" |
| 31363 | "char __ovld __cnfn convert_char_rtz(double);\n" |
| 31364 | "char __ovld __cnfn convert_char_sat(double);\n" |
| 31365 | "char __ovld __cnfn convert_char_sat_rte(double);\n" |
| 31366 | "char __ovld __cnfn convert_char_sat_rtn(double);\n" |
| 31367 | "char __ovld __cnfn convert_char_sat_rtp(double);\n" |
| 31368 | "char __ovld __cnfn convert_char_sat_rtz(double);\n" |
| 31369 | "char2 __ovld __cnfn convert_char2(double2);\n" |
| 31370 | "char2 __ovld __cnfn convert_char2_rte(double2);\n" |
| 31371 | "char2 __ovld __cnfn convert_char2_rtn(double2);\n" |
| 31372 | "char2 __ovld __cnfn convert_char2_rtp(double2);\n" |
| 31373 | "char2 __ovld __cnfn convert_char2_rtz(double2);\n" |
| 31374 | "char2 __ovld __cnfn convert_char2_sat(double2);\n" |
| 31375 | "char2 __ovld __cnfn convert_char2_sat_rte(double2);\n" |
| 31376 | "char2 __ovld __cnfn convert_char2_sat_rtn(double2);\n" |
| 31377 | "char2 __ovld __cnfn convert_char2_sat_rtp(double2);\n" |
| 31378 | "char2 __ovld __cnfn convert_char2_sat_rtz(double2);\n" |
| 31379 | "char3 __ovld __cnfn convert_char3(double3);\n" |
| 31380 | "char3 __ovld __cnfn convert_char3_rte(double3);\n" |
| 31381 | "char3 __ovld __cnfn convert_char3_rtn(double3);\n" |
| 31382 | "char3 __ovld __cnfn convert_char3_rtp(double3);\n" |
| 31383 | "char3 __ovld __cnfn convert_char3_rtz(double3);\n" |
| 31384 | "char3 __ovld __cnfn convert_char3_sat(double3);\n" |
| 31385 | "char3 __ovld __cnfn convert_char3_sat_rte(double3);\n" |
| 31386 | "char3 __ovld __cnfn convert_char3_sat_rtn(double3);\n" |
| 31387 | "char3 __ovld __cnfn convert_char3_sat_rtp(double3);\n" |
| 31388 | "char3 __ovld __cnfn convert_char3_sat_rtz(double3);\n" |
| 31389 | "char4 __ovld __cnfn convert_char4(double4);\n" |
| 31390 | "char4 __ovld __cnfn convert_char4_rte(double4);\n" |
| 31391 | "char4 __ovld __cnfn convert_char4_rtn(double4);\n" |
| 31392 | "char4 __ovld __cnfn convert_char4_rtp(double4);\n" |
| 31393 | "char4 __ovld __cnfn convert_char4_rtz(double4);\n" |
| 31394 | "char4 __ovld __cnfn convert_char4_sat(double4);\n" |
| 31395 | "char4 __ovld __cnfn convert_char4_sat_rte(double4);\n" |
| 31396 | "char4 __ovld __cnfn convert_char4_sat_rtn(double4);\n" |
| 31397 | "char4 __ovld __cnfn convert_char4_sat_rtp(double4);\n" |
| 31398 | "char4 __ovld __cnfn convert_char4_sat_rtz(double4);\n" |
| 31399 | "char8 __ovld __cnfn convert_char8(double8);\n" |
| 31400 | "char8 __ovld __cnfn convert_char8_rte(double8);\n" |
| 31401 | "char8 __ovld __cnfn convert_char8_rtn(double8);\n" |
| 31402 | "char8 __ovld __cnfn convert_char8_rtp(double8);\n" |
| 31403 | "char8 __ovld __cnfn convert_char8_rtz(double8);\n" |
| 31404 | "char8 __ovld __cnfn convert_char8_sat(double8);\n" |
| 31405 | "char8 __ovld __cnfn convert_char8_sat_rte(double8);\n" |
| 31406 | "char8 __ovld __cnfn convert_char8_sat_rtn(double8);\n" |
| 31407 | "char8 __ovld __cnfn convert_char8_sat_rtp(double8);\n" |
| 31408 | "char8 __ovld __cnfn convert_char8_sat_rtz(double8);\n" |
| 31409 | "char16 __ovld __cnfn convert_char16(double16);\n" |
| 31410 | "char16 __ovld __cnfn convert_char16_rte(double16);\n" |
| 31411 | "char16 __ovld __cnfn convert_char16_rtn(double16);\n" |
| 31412 | "char16 __ovld __cnfn convert_char16_rtp(double16);\n" |
| 31413 | "char16 __ovld __cnfn convert_char16_rtz(double16);\n" |
| 31414 | "char16 __ovld __cnfn convert_char16_sat(double16);\n" |
| 31415 | "char16 __ovld __cnfn convert_char16_sat_rte(double16);\n" |
| 31416 | "char16 __ovld __cnfn convert_char16_sat_rtn(double16);\n" |
| 31417 | "char16 __ovld __cnfn convert_char16_sat_rtp(double16);\n" |
| 31418 | "char16 __ovld __cnfn convert_char16_sat_rtz(double16);\n" |
| 31419 | "\n" |
| 31420 | "uchar __ovld __cnfn convert_uchar(double);\n" |
| 31421 | "uchar __ovld __cnfn convert_uchar_rte(double);\n" |
| 31422 | "uchar __ovld __cnfn convert_uchar_rtn(double);\n" |
| 31423 | "uchar __ovld __cnfn convert_uchar_rtp(double);\n" |
| 31424 | "uchar __ovld __cnfn convert_uchar_rtz(double);\n" |
| 31425 | "uchar __ovld __cnfn convert_uchar_sat(double);\n" |
| 31426 | "uchar __ovld __cnfn convert_uchar_sat_rte(double);\n" |
| 31427 | "uchar __ovld __cnfn convert_uchar_sat_rtn(double);\n" |
| 31428 | "uchar __ovld __cnfn convert_uchar_sat_rtp(double);\n" |
| 31429 | "uchar __ovld __cnfn convert_uchar_sat_rtz(double);\n" |
| 31430 | "uchar2 __ovld __cnfn convert_uchar2(double2);\n" |
| 31431 | "uchar2 __ovld __cnfn convert_uchar2_rte(double2);\n" |
| 31432 | "uchar2 __ovld __cnfn convert_uchar2_rtn(double2);\n" |
| 31433 | "uchar2 __ovld __cnfn convert_uchar2_rtp(double2);\n" |
| 31434 | "uchar2 __ovld __cnfn convert_uchar2_rtz(double2);\n" |
| 31435 | "uchar2 __ovld __cnfn convert_uchar2_sat(double2);\n" |
| 31436 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(double2);\n" |
| 31437 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(double2);\n" |
| 31438 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(double2);\n" |
| 31439 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(double2);\n" |
| 31440 | "uchar3 __ovld __cnfn convert_uchar3(double3);\n" |
| 31441 | "uchar3 __ovld __cnfn convert_uchar3_rte(double3);\n" |
| 31442 | "uchar3 __ovld __cnfn convert_uchar3_rtn(double3);\n" |
| 31443 | "uchar3 __ovld __cnfn convert_uchar3_rtp(double3);\n" |
| 31444 | "uchar3 __ovld __cnfn convert_uchar3_rtz(double3);\n" |
| 31445 | "uchar3 __ovld __cnfn convert_uchar3_sat(double3);\n" |
| 31446 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(double3);\n" |
| 31447 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(double3);\n" |
| 31448 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(double3);\n" |
| 31449 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(double3);\n" |
| 31450 | "uchar4 __ovld __cnfn convert_uchar4(double4);\n" |
| 31451 | "uchar4 __ovld __cnfn convert_uchar4_rte(double4);\n" |
| 31452 | "uchar4 __ovld __cnfn convert_uchar4_rtn(double4);\n" |
| 31453 | "uchar4 __ovld __cnfn convert_uchar4_rtp(double4);\n" |
| 31454 | "uchar4 __ovld __cnfn convert_uchar4_rtz(double4);\n" |
| 31455 | "uchar4 __ovld __cnfn convert_uchar4_sat(double4);\n" |
| 31456 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(double4);\n" |
| 31457 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(double4);\n" |
| 31458 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(double4);\n" |
| 31459 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(double4);\n" |
| 31460 | "uchar8 __ovld __cnfn convert_uchar8(double8);\n" |
| 31461 | "uchar8 __ovld __cnfn convert_uchar8_rte(double8);\n" |
| 31462 | "uchar8 __ovld __cnfn convert_uchar8_rtn(double8);\n" |
| 31463 | "uchar8 __ovld __cnfn convert_uchar8_rtp(double8);\n" |
| 31464 | "uchar8 __ovld __cnfn convert_uchar8_rtz(double8);\n" |
| 31465 | "uchar8 __ovld __cnfn convert_uchar8_sat(double8);\n" |
| 31466 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(double8);\n" |
| 31467 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(double8);\n" |
| 31468 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(double8);\n" |
| 31469 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(double8);\n" |
| 31470 | "uchar16 __ovld __cnfn convert_uchar16(double16);\n" |
| 31471 | "uchar16 __ovld __cnfn convert_uchar16_rte(double16);\n" |
| 31472 | "uchar16 __ovld __cnfn convert_uchar16_rtn(double16);\n" |
| 31473 | "uchar16 __ovld __cnfn convert_uchar16_rtp(double16);\n" |
| 31474 | "uchar16 __ovld __cnfn convert_uchar16_rtz(double16);\n" |
| 31475 | "uchar16 __ovld __cnfn convert_uchar16_sat(double16);\n" |
| 31476 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(double16);\n" |
| 31477 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(double16);\n" |
| 31478 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(double16);\n" |
| 31479 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(double16);\n" |
| 31480 | "\n" |
| 31481 | "short __ovld __cnfn convert_short(double);\n" |
| 31482 | "short __ovld __cnfn convert_short_rte(double);\n" |
| 31483 | "short __ovld __cnfn convert_short_rtn(double);\n" |
| 31484 | "short __ovld __cnfn convert_short_rtp(double);\n" |
| 31485 | "short __ovld __cnfn convert_short_rtz(double);\n" |
| 31486 | "short __ovld __cnfn convert_short_sat(double);\n" |
| 31487 | "short __ovld __cnfn convert_short_sat_rte(double);\n" |
| 31488 | "short __ovld __cnfn convert_short_sat_rtn(double);\n" |
| 31489 | "short __ovld __cnfn convert_short_sat_rtp(double);\n" |
| 31490 | "short __ovld __cnfn convert_short_sat_rtz(double);\n" |
| 31491 | "short2 __ovld __cnfn convert_short2(double2);\n" |
| 31492 | "short2 __ovld __cnfn convert_short2_rte(double2);\n" |
| 31493 | "short2 __ovld __cnfn convert_short2_rtn(double2);\n" |
| 31494 | "short2 __ovld __cnfn convert_short2_rtp(double2);\n" |
| 31495 | "short2 __ovld __cnfn convert_short2_rtz(double2);\n" |
| 31496 | "short2 __ovld __cnfn convert_short2_sat(double2);\n" |
| 31497 | "short2 __ovld __cnfn convert_short2_sat_rte(double2);\n" |
| 31498 | "short2 __ovld __cnfn convert_short2_sat_rtn(double2);\n" |
| 31499 | "short2 __ovld __cnfn convert_short2_sat_rtp(double2);\n" |
| 31500 | "short2 __ovld __cnfn convert_short2_sat_rtz(double2);\n" |
| 31501 | "short3 __ovld __cnfn convert_short3(double3);\n" |
| 31502 | "short3 __ovld __cnfn convert_short3_rte(double3);\n" |
| 31503 | "short3 __ovld __cnfn convert_short3_rtn(double3);\n" |
| 31504 | "short3 __ovld __cnfn convert_short3_rtp(double3);\n" |
| 31505 | "short3 __ovld __cnfn convert_short3_rtz(double3);\n" |
| 31506 | "short3 __ovld __cnfn convert_short3_sat(double3);\n" |
| 31507 | "short3 __ovld __cnfn convert_short3_sat_rte(double3);\n" |
| 31508 | "short3 __ovld __cnfn convert_short3_sat_rtn(double3);\n" |
| 31509 | "short3 __ovld __cnfn convert_short3_sat_rtp(double3);\n" |
| 31510 | "short3 __ovld __cnfn convert_short3_sat_rtz(double3);\n" |
| 31511 | "short4 __ovld __cnfn convert_short4(double4);\n" |
| 31512 | "short4 __ovld __cnfn convert_short4_rte(double4);\n" |
| 31513 | "short4 __ovld __cnfn convert_short4_rtn(double4);\n" |
| 31514 | "short4 __ovld __cnfn convert_short4_rtp(double4);\n" |
| 31515 | "short4 __ovld __cnfn convert_short4_rtz(double4);\n" |
| 31516 | "short4 __ovld __cnfn convert_short4_sat(double4);\n" |
| 31517 | "short4 __ovld __cnfn convert_short4_sat_rte(double4);\n" |
| 31518 | "short4 __ovld __cnfn convert_short4_sat_rtn(double4);\n" |
| 31519 | "short4 __ovld __cnfn convert_short4_sat_rtp(double4);\n" |
| 31520 | "short4 __ovld __cnfn convert_short4_sat_rtz(double4);\n" |
| 31521 | "short8 __ovld __cnfn convert_short8(double8);\n" |
| 31522 | "short8 __ovld __cnfn convert_short8_rte(double8);\n" |
| 31523 | "short8 __ovld __cnfn convert_short8_rtn(double8);\n" |
| 31524 | "short8 __ovld __cnfn convert_short8_rtp(double8);\n" |
| 31525 | "short8 __ovld __cnfn convert_short8_rtz(double8);\n" |
| 31526 | "short8 __ovld __cnfn convert_short8_sat(double8);\n" |
| 31527 | "short8 __ovld __cnfn convert_short8_sat_rte(double8);\n" |
| 31528 | "short8 __ovld __cnfn convert_short8_sat_rtn(double8);\n" |
| 31529 | "short8 __ovld __cnfn convert_short8_sat_rtp(double8);\n" |
| 31530 | "short8 __ovld __cnfn convert_short8_sat_rtz(double8);\n" |
| 31531 | "short16 __ovld __cnfn convert_short16(double16);\n" |
| 31532 | "short16 __ovld __cnfn convert_short16_rte(double16);\n" |
| 31533 | "short16 __ovld __cnfn convert_short16_rtn(double16);\n" |
| 31534 | "short16 __ovld __cnfn convert_short16_rtp(double16);\n" |
| 31535 | "short16 __ovld __cnfn convert_short16_rtz(double16);\n" |
| 31536 | "short16 __ovld __cnfn convert_short16_sat(double16);\n" |
| 31537 | "short16 __ovld __cnfn convert_short16_sat_rte(double16);\n" |
| 31538 | "short16 __ovld __cnfn convert_short16_sat_rtn(double16);\n" |
| 31539 | "short16 __ovld __cnfn convert_short16_sat_rtp(double16);\n" |
| 31540 | "short16 __ovld __cnfn convert_short16_sat_rtz(double16);\n" |
| 31541 | "\n" |
| 31542 | "ushort __ovld __cnfn convert_ushort(double);\n" |
| 31543 | "ushort __ovld __cnfn convert_ushort_rte(double);\n" |
| 31544 | "ushort __ovld __cnfn convert_ushort_rtn(double);\n" |
| 31545 | "ushort __ovld __cnfn convert_ushort_rtp(double);\n" |
| 31546 | "ushort __ovld __cnfn convert_ushort_rtz(double);\n" |
| 31547 | "ushort __ovld __cnfn convert_ushort_sat(double);\n" |
| 31548 | "ushort __ovld __cnfn convert_ushort_sat_rte(double);\n" |
| 31549 | "ushort __ovld __cnfn convert_ushort_sat_rtn(double);\n" |
| 31550 | "ushort __ovld __cnfn convert_ushort_sat_rtp(double);\n" |
| 31551 | "ushort __ovld __cnfn convert_ushort_sat_rtz(double);\n" |
| 31552 | "ushort2 __ovld __cnfn convert_ushort2(double2);\n" |
| 31553 | "ushort2 __ovld __cnfn convert_ushort2_rte(double2);\n" |
| 31554 | "ushort2 __ovld __cnfn convert_ushort2_rtn(double2);\n" |
| 31555 | "ushort2 __ovld __cnfn convert_ushort2_rtp(double2);\n" |
| 31556 | "ushort2 __ovld __cnfn convert_ushort2_rtz(double2);\n" |
| 31557 | "ushort2 __ovld __cnfn convert_ushort2_sat(double2);\n" |
| 31558 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(double2);\n" |
| 31559 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(double2);\n" |
| 31560 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(double2);\n" |
| 31561 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(double2);\n" |
| 31562 | "ushort3 __ovld __cnfn convert_ushort3(double3);\n" |
| 31563 | "ushort3 __ovld __cnfn convert_ushort3_rte(double3);\n" |
| 31564 | "ushort3 __ovld __cnfn convert_ushort3_rtn(double3);\n" |
| 31565 | "ushort3 __ovld __cnfn convert_ushort3_rtp(double3);\n" |
| 31566 | "ushort3 __ovld __cnfn convert_ushort3_rtz(double3);\n" |
| 31567 | "ushort3 __ovld __cnfn convert_ushort3_sat(double3);\n" |
| 31568 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(double3);\n" |
| 31569 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(double3);\n" |
| 31570 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(double3);\n" |
| 31571 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(double3);\n" |
| 31572 | "ushort4 __ovld __cnfn convert_ushort4(double4);\n" |
| 31573 | "ushort4 __ovld __cnfn convert_ushort4_rte(double4);\n" |
| 31574 | "ushort4 __ovld __cnfn convert_ushort4_rtn(double4);\n" |
| 31575 | "ushort4 __ovld __cnfn convert_ushort4_rtp(double4);\n" |
| 31576 | "ushort4 __ovld __cnfn convert_ushort4_rtz(double4);\n" |
| 31577 | "ushort4 __ovld __cnfn convert_ushort4_sat(double4);\n" |
| 31578 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(double4);\n" |
| 31579 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(double4);\n" |
| 31580 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(double4);\n" |
| 31581 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(double4);\n" |
| 31582 | "ushort8 __ovld __cnfn convert_ushort8(double8);\n" |
| 31583 | "ushort8 __ovld __cnfn convert_ushort8_rte(double8);\n" |
| 31584 | "ushort8 __ovld __cnfn convert_ushort8_rtn(double8);\n" |
| 31585 | "ushort8 __ovld __cnfn convert_ushort8_rtp(double8);\n" |
| 31586 | "ushort8 __ovld __cnfn convert_ushort8_rtz(double8);\n" |
| 31587 | "ushort8 __ovld __cnfn convert_ushort8_sat(double8);\n" |
| 31588 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(double8);\n" |
| 31589 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(double8);\n" |
| 31590 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(double8);\n" |
| 31591 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(double8);\n" |
| 31592 | "ushort16 __ovld __cnfn convert_ushort16(double16);\n" |
| 31593 | "ushort16 __ovld __cnfn convert_ushort16_rte(double16);\n" |
| 31594 | "ushort16 __ovld __cnfn convert_ushort16_rtn(double16);\n" |
| 31595 | "ushort16 __ovld __cnfn convert_ushort16_rtp(double16);\n" |
| 31596 | "ushort16 __ovld __cnfn convert_ushort16_rtz(double16);\n" |
| 31597 | "ushort16 __ovld __cnfn convert_ushort16_sat(double16);\n" |
| 31598 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(double16);\n" |
| 31599 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(double16);\n" |
| 31600 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(double16);\n" |
| 31601 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(double16);\n" |
| 31602 | "\n" |
| 31603 | "int __ovld __cnfn convert_int(double);\n" |
| 31604 | "int __ovld __cnfn convert_int_rte(double);\n" |
| 31605 | "int __ovld __cnfn convert_int_rtn(double);\n" |
| 31606 | "int __ovld __cnfn convert_int_rtp(double);\n" |
| 31607 | "int __ovld __cnfn convert_int_rtz(double);\n" |
| 31608 | "int __ovld __cnfn convert_int_sat(double);\n" |
| 31609 | "int __ovld __cnfn convert_int_sat_rte(double);\n" |
| 31610 | "int __ovld __cnfn convert_int_sat_rtn(double);\n" |
| 31611 | "int __ovld __cnfn convert_int_sat_rtp(double);\n" |
| 31612 | "int __ovld __cnfn convert_int_sat_rtz(double);\n" |
| 31613 | "int2 __ovld __cnfn convert_int2(double2);\n" |
| 31614 | "int2 __ovld __cnfn convert_int2_rte(double2);\n" |
| 31615 | "int2 __ovld __cnfn convert_int2_rtn(double2);\n" |
| 31616 | "int2 __ovld __cnfn convert_int2_rtp(double2);\n" |
| 31617 | "int2 __ovld __cnfn convert_int2_rtz(double2);\n" |
| 31618 | "int2 __ovld __cnfn convert_int2_sat(double2);\n" |
| 31619 | "int2 __ovld __cnfn convert_int2_sat_rte(double2);\n" |
| 31620 | "int2 __ovld __cnfn convert_int2_sat_rtn(double2);\n" |
| 31621 | "int2 __ovld __cnfn convert_int2_sat_rtp(double2);\n" |
| 31622 | "int2 __ovld __cnfn convert_int2_sat_rtz(double2);\n" |
| 31623 | "int3 __ovld __cnfn convert_int3(double3);\n" |
| 31624 | "int3 __ovld __cnfn convert_int3_rte(double3);\n" |
| 31625 | "int3 __ovld __cnfn convert_int3_rtn(double3);\n" |
| 31626 | "int3 __ovld __cnfn convert_int3_rtp(double3);\n" |
| 31627 | "int3 __ovld __cnfn convert_int3_rtz(double3);\n" |
| 31628 | "int3 __ovld __cnfn convert_int3_sat(double3);\n" |
| 31629 | "int3 __ovld __cnfn convert_int3_sat_rte(double3);\n" |
| 31630 | "int3 __ovld __cnfn convert_int3_sat_rtn(double3);\n" |
| 31631 | "int3 __ovld __cnfn convert_int3_sat_rtp(double3);\n" |
| 31632 | "int3 __ovld __cnfn convert_int3_sat_rtz(double3);\n" |
| 31633 | "int4 __ovld __cnfn convert_int4(double4);\n" |
| 31634 | "int4 __ovld __cnfn convert_int4_rte(double4);\n" |
| 31635 | "int4 __ovld __cnfn convert_int4_rtn(double4);\n" |
| 31636 | "int4 __ovld __cnfn convert_int4_rtp(double4);\n" |
| 31637 | "int4 __ovld __cnfn convert_int4_rtz(double4);\n" |
| 31638 | "int4 __ovld __cnfn convert_int4_sat(double4);\n" |
| 31639 | "int4 __ovld __cnfn convert_int4_sat_rte(double4);\n" |
| 31640 | "int4 __ovld __cnfn convert_int4_sat_rtn(double4);\n" |
| 31641 | "int4 __ovld __cnfn convert_int4_sat_rtp(double4);\n" |
| 31642 | "int4 __ovld __cnfn convert_int4_sat_rtz(double4);\n" |
| 31643 | "int8 __ovld __cnfn convert_int8(double8);\n" |
| 31644 | "int8 __ovld __cnfn convert_int8_rte(double8);\n" |
| 31645 | "int8 __ovld __cnfn convert_int8_rtn(double8);\n" |
| 31646 | "int8 __ovld __cnfn convert_int8_rtp(double8);\n" |
| 31647 | "int8 __ovld __cnfn convert_int8_rtz(double8);\n" |
| 31648 | "int8 __ovld __cnfn convert_int8_sat(double8);\n" |
| 31649 | "int8 __ovld __cnfn convert_int8_sat_rte(double8);\n" |
| 31650 | "int8 __ovld __cnfn convert_int8_sat_rtn(double8);\n" |
| 31651 | "int8 __ovld __cnfn convert_int8_sat_rtp(double8);\n" |
| 31652 | "int8 __ovld __cnfn convert_int8_sat_rtz(double8);\n" |
| 31653 | "int16 __ovld __cnfn convert_int16(double16);\n" |
| 31654 | "int16 __ovld __cnfn convert_int16_rte(double16);\n" |
| 31655 | "int16 __ovld __cnfn convert_int16_rtn(double16);\n" |
| 31656 | "int16 __ovld __cnfn convert_int16_rtp(double16);\n" |
| 31657 | "int16 __ovld __cnfn convert_int16_rtz(double16);\n" |
| 31658 | "int16 __ovld __cnfn convert_int16_sat(double16);\n" |
| 31659 | "int16 __ovld __cnfn convert_int16_sat_rte(double16);\n" |
| 31660 | "int16 __ovld __cnfn convert_int16_sat_rtn(double16);\n" |
| 31661 | "int16 __ovld __cnfn convert_int16_sat_rtp(double16);\n" |
| 31662 | "int16 __ovld __cnfn convert_int16_sat_rtz(double16);\n" |
| 31663 | "\n" |
| 31664 | "uint __ovld __cnfn convert_uint(double);\n" |
| 31665 | "uint __ovld __cnfn convert_uint_rte(double);\n" |
| 31666 | "uint __ovld __cnfn convert_uint_rtn(double);\n" |
| 31667 | "uint __ovld __cnfn convert_uint_rtp(double);\n" |
| 31668 | "uint __ovld __cnfn convert_uint_rtz(double);\n" |
| 31669 | "uint __ovld __cnfn convert_uint_sat(double);\n" |
| 31670 | "uint __ovld __cnfn convert_uint_sat_rte(double);\n" |
| 31671 | "uint __ovld __cnfn convert_uint_sat_rtn(double);\n" |
| 31672 | "uint __ovld __cnfn convert_uint_sat_rtp(double);\n" |
| 31673 | "uint __ovld __cnfn convert_uint_sat_rtz(double);\n" |
| 31674 | "uint2 __ovld __cnfn convert_uint2(double2);\n" |
| 31675 | "uint2 __ovld __cnfn convert_uint2_rte(double2);\n" |
| 31676 | "uint2 __ovld __cnfn convert_uint2_rtn(double2);\n" |
| 31677 | "uint2 __ovld __cnfn convert_uint2_rtp(double2);\n" |
| 31678 | "uint2 __ovld __cnfn convert_uint2_rtz(double2);\n" |
| 31679 | "uint2 __ovld __cnfn convert_uint2_sat(double2);\n" |
| 31680 | "uint2 __ovld __cnfn convert_uint2_sat_rte(double2);\n" |
| 31681 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(double2);\n" |
| 31682 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(double2);\n" |
| 31683 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(double2);\n" |
| 31684 | "uint3 __ovld __cnfn convert_uint3(double3);\n" |
| 31685 | "uint3 __ovld __cnfn convert_uint3_rte(double3);\n" |
| 31686 | "uint3 __ovld __cnfn convert_uint3_rtn(double3);\n" |
| 31687 | "uint3 __ovld __cnfn convert_uint3_rtp(double3);\n" |
| 31688 | "uint3 __ovld __cnfn convert_uint3_rtz(double3);\n" |
| 31689 | "uint3 __ovld __cnfn convert_uint3_sat(double3);\n" |
| 31690 | "uint3 __ovld __cnfn convert_uint3_sat_rte(double3);\n" |
| 31691 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(double3);\n" |
| 31692 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(double3);\n" |
| 31693 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(double3);\n" |
| 31694 | "uint4 __ovld __cnfn convert_uint4(double4);\n" |
| 31695 | "uint4 __ovld __cnfn convert_uint4_rte(double4);\n" |
| 31696 | "uint4 __ovld __cnfn convert_uint4_rtn(double4);\n" |
| 31697 | "uint4 __ovld __cnfn convert_uint4_rtp(double4);\n" |
| 31698 | "uint4 __ovld __cnfn convert_uint4_rtz(double4);\n" |
| 31699 | "uint4 __ovld __cnfn convert_uint4_sat(double4);\n" |
| 31700 | "uint4 __ovld __cnfn convert_uint4_sat_rte(double4);\n" |
| 31701 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(double4);\n" |
| 31702 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(double4);\n" |
| 31703 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(double4);\n" |
| 31704 | "uint8 __ovld __cnfn convert_uint8(double8);\n" |
| 31705 | "uint8 __ovld __cnfn convert_uint8_rte(double8);\n" |
| 31706 | "uint8 __ovld __cnfn convert_uint8_rtn(double8);\n" |
| 31707 | "uint8 __ovld __cnfn convert_uint8_rtp(double8);\n" |
| 31708 | "uint8 __ovld __cnfn convert_uint8_rtz(double8);\n" |
| 31709 | "uint8 __ovld __cnfn convert_uint8_sat(double8);\n" |
| 31710 | "uint8 __ovld __cnfn convert_uint8_sat_rte(double8);\n" |
| 31711 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(double8);\n" |
| 31712 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(double8);\n" |
| 31713 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(double8);\n" |
| 31714 | "uint16 __ovld __cnfn convert_uint16(double16);\n" |
| 31715 | "uint16 __ovld __cnfn convert_uint16_rte(double16);\n" |
| 31716 | "uint16 __ovld __cnfn convert_uint16_rtn(double16);\n" |
| 31717 | "uint16 __ovld __cnfn convert_uint16_rtp(double16);\n" |
| 31718 | "uint16 __ovld __cnfn convert_uint16_rtz(double16);\n" |
| 31719 | "uint16 __ovld __cnfn convert_uint16_sat(double16);\n" |
| 31720 | "uint16 __ovld __cnfn convert_uint16_sat_rte(double16);\n" |
| 31721 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(double16);\n" |
| 31722 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(double16);\n" |
| 31723 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(double16);\n" |
| 31724 | "\n" |
| 31725 | "long __ovld __cnfn convert_long(double);\n" |
| 31726 | "long __ovld __cnfn convert_long_rte(double);\n" |
| 31727 | "long __ovld __cnfn convert_long_rtn(double);\n" |
| 31728 | "long __ovld __cnfn convert_long_rtp(double);\n" |
| 31729 | "long __ovld __cnfn convert_long_rtz(double);\n" |
| 31730 | "long __ovld __cnfn convert_long_sat(double);\n" |
| 31731 | "long __ovld __cnfn convert_long_sat_rte(double);\n" |
| 31732 | "long __ovld __cnfn convert_long_sat_rtn(double);\n" |
| 31733 | "long __ovld __cnfn convert_long_sat_rtp(double);\n" |
| 31734 | "long __ovld __cnfn convert_long_sat_rtz(double);\n" |
| 31735 | "long2 __ovld __cnfn convert_long2(double2);\n" |
| 31736 | "long2 __ovld __cnfn convert_long2_rte(double2);\n" |
| 31737 | "long2 __ovld __cnfn convert_long2_rtn(double2);\n" |
| 31738 | "long2 __ovld __cnfn convert_long2_rtp(double2);\n" |
| 31739 | "long2 __ovld __cnfn convert_long2_rtz(double2);\n" |
| 31740 | "long2 __ovld __cnfn convert_long2_sat(double2);\n" |
| 31741 | "long2 __ovld __cnfn convert_long2_sat_rte(double2);\n" |
| 31742 | "long2 __ovld __cnfn convert_long2_sat_rtn(double2);\n" |
| 31743 | "long2 __ovld __cnfn convert_long2_sat_rtp(double2);\n" |
| 31744 | "long2 __ovld __cnfn convert_long2_sat_rtz(double2);\n" |
| 31745 | "long3 __ovld __cnfn convert_long3(double3);\n" |
| 31746 | "long3 __ovld __cnfn convert_long3_rte(double3);\n" |
| 31747 | "long3 __ovld __cnfn convert_long3_rtn(double3);\n" |
| 31748 | "long3 __ovld __cnfn convert_long3_rtp(double3);\n" |
| 31749 | "long3 __ovld __cnfn convert_long3_rtz(double3);\n" |
| 31750 | "long3 __ovld __cnfn convert_long3_sat(double3);\n" |
| 31751 | "long3 __ovld __cnfn convert_long3_sat_rte(double3);\n" |
| 31752 | "long3 __ovld __cnfn convert_long3_sat_rtn(double3);\n" |
| 31753 | "long3 __ovld __cnfn convert_long3_sat_rtp(double3);\n" |
| 31754 | "long3 __ovld __cnfn convert_long3_sat_rtz(double3);\n" |
| 31755 | "long4 __ovld __cnfn convert_long4(double4);\n" |
| 31756 | "long4 __ovld __cnfn convert_long4_rte(double4);\n" |
| 31757 | "long4 __ovld __cnfn convert_long4_rtn(double4);\n" |
| 31758 | "long4 __ovld __cnfn convert_long4_rtp(double4);\n" |
| 31759 | "long4 __ovld __cnfn convert_long4_rtz(double4);\n" |
| 31760 | "long4 __ovld __cnfn convert_long4_sat(double4);\n" |
| 31761 | "long4 __ovld __cnfn convert_long4_sat_rte(double4);\n" |
| 31762 | "long4 __ovld __cnfn convert_long4_sat_rtn(double4);\n" |
| 31763 | "long4 __ovld __cnfn convert_long4_sat_rtp(double4);\n" |
| 31764 | "long4 __ovld __cnfn convert_long4_sat_rtz(double4);\n" |
| 31765 | "long8 __ovld __cnfn convert_long8(double8);\n" |
| 31766 | "long8 __ovld __cnfn convert_long8_rte(double8);\n" |
| 31767 | "long8 __ovld __cnfn convert_long8_rtn(double8);\n" |
| 31768 | "long8 __ovld __cnfn convert_long8_rtp(double8);\n" |
| 31769 | "long8 __ovld __cnfn convert_long8_rtz(double8);\n" |
| 31770 | "long8 __ovld __cnfn convert_long8_sat(double8);\n" |
| 31771 | "long8 __ovld __cnfn convert_long8_sat_rte(double8);\n" |
| 31772 | "long8 __ovld __cnfn convert_long8_sat_rtn(double8);\n" |
| 31773 | "long8 __ovld __cnfn convert_long8_sat_rtp(double8);\n" |
| 31774 | "long8 __ovld __cnfn convert_long8_sat_rtz(double8);\n" |
| 31775 | "long16 __ovld __cnfn convert_long16(double16);\n" |
| 31776 | "long16 __ovld __cnfn convert_long16_rte(double16);\n" |
| 31777 | "long16 __ovld __cnfn convert_long16_rtn(double16);\n" |
| 31778 | "long16 __ovld __cnfn convert_long16_rtp(double16);\n" |
| 31779 | "long16 __ovld __cnfn convert_long16_rtz(double16);\n" |
| 31780 | "long16 __ovld __cnfn convert_long16_sat(double16);\n" |
| 31781 | "long16 __ovld __cnfn convert_long16_sat_rte(double16);\n" |
| 31782 | "long16 __ovld __cnfn convert_long16_sat_rtn(double16);\n" |
| 31783 | "long16 __ovld __cnfn convert_long16_sat_rtp(double16);\n" |
| 31784 | "long16 __ovld __cnfn convert_long16_sat_rtz(double16);\n" |
| 31785 | "\n" |
| 31786 | "ulong __ovld __cnfn convert_ulong(double);\n" |
| 31787 | "ulong __ovld __cnfn convert_ulong_rte(double);\n" |
| 31788 | "ulong __ovld __cnfn convert_ulong_rtn(double);\n" |
| 31789 | "ulong __ovld __cnfn convert_ulong_rtp(double);\n" |
| 31790 | "ulong __ovld __cnfn convert_ulong_rtz(double);\n" |
| 31791 | "ulong __ovld __cnfn convert_ulong_sat(double);\n" |
| 31792 | "ulong __ovld __cnfn convert_ulong_sat_rte(double);\n" |
| 31793 | "ulong __ovld __cnfn convert_ulong_sat_rtn(double);\n" |
| 31794 | "ulong __ovld __cnfn convert_ulong_sat_rtp(double);\n" |
| 31795 | "ulong __ovld __cnfn convert_ulong_sat_rtz(double);\n" |
| 31796 | "ulong2 __ovld __cnfn convert_ulong2(double2);\n" |
| 31797 | "ulong2 __ovld __cnfn convert_ulong2_rte(double2);\n" |
| 31798 | "ulong2 __ovld __cnfn convert_ulong2_rtn(double2);\n" |
| 31799 | "ulong2 __ovld __cnfn convert_ulong2_rtp(double2);\n" |
| 31800 | "ulong2 __ovld __cnfn convert_ulong2_rtz(double2);\n" |
| 31801 | "ulong2 __ovld __cnfn convert_ulong2_sat(double2);\n" |
| 31802 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(double2);\n" |
| 31803 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(double2);\n" |
| 31804 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(double2);\n" |
| 31805 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(double2);\n" |
| 31806 | "ulong3 __ovld __cnfn convert_ulong3(double3);\n" |
| 31807 | "ulong3 __ovld __cnfn convert_ulong3_rte(double3);\n" |
| 31808 | "ulong3 __ovld __cnfn convert_ulong3_rtn(double3);\n" |
| 31809 | "ulong3 __ovld __cnfn convert_ulong3_rtp(double3);\n" |
| 31810 | "ulong3 __ovld __cnfn convert_ulong3_rtz(double3);\n" |
| 31811 | "ulong3 __ovld __cnfn convert_ulong3_sat(double3);\n" |
| 31812 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(double3);\n" |
| 31813 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(double3);\n" |
| 31814 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(double3);\n" |
| 31815 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(double3);\n" |
| 31816 | "ulong4 __ovld __cnfn convert_ulong4(double4);\n" |
| 31817 | "ulong4 __ovld __cnfn convert_ulong4_rte(double4);\n" |
| 31818 | "ulong4 __ovld __cnfn convert_ulong4_rtn(double4);\n" |
| 31819 | "ulong4 __ovld __cnfn convert_ulong4_rtp(double4);\n" |
| 31820 | "ulong4 __ovld __cnfn convert_ulong4_rtz(double4);\n" |
| 31821 | "ulong4 __ovld __cnfn convert_ulong4_sat(double4);\n" |
| 31822 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(double4);\n" |
| 31823 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(double4);\n" |
| 31824 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(double4);\n" |
| 31825 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(double4);\n" |
| 31826 | "ulong8 __ovld __cnfn convert_ulong8(double8);\n" |
| 31827 | "ulong8 __ovld __cnfn convert_ulong8_rte(double8);\n" |
| 31828 | "ulong8 __ovld __cnfn convert_ulong8_rtn(double8);\n" |
| 31829 | "ulong8 __ovld __cnfn convert_ulong8_rtp(double8);\n" |
| 31830 | "ulong8 __ovld __cnfn convert_ulong8_rtz(double8);\n" |
| 31831 | "ulong8 __ovld __cnfn convert_ulong8_sat(double8);\n" |
| 31832 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(double8);\n" |
| 31833 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(double8);\n" |
| 31834 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(double8);\n" |
| 31835 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(double8);\n" |
| 31836 | "ulong16 __ovld __cnfn convert_ulong16(double16);\n" |
| 31837 | "ulong16 __ovld __cnfn convert_ulong16_rte(double16);\n" |
| 31838 | "ulong16 __ovld __cnfn convert_ulong16_rtn(double16);\n" |
| 31839 | "ulong16 __ovld __cnfn convert_ulong16_rtp(double16);\n" |
| 31840 | "ulong16 __ovld __cnfn convert_ulong16_rtz(double16);\n" |
| 31841 | "ulong16 __ovld __cnfn convert_ulong16_sat(double16);\n" |
| 31842 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(double16);\n" |
| 31843 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(double16);\n" |
| 31844 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(double16);\n" |
| 31845 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(double16);\n" |
| 31846 | "\n" |
| 31847 | "float __ovld __cnfn convert_float(double);\n" |
| 31848 | "float __ovld __cnfn convert_float_rte(double);\n" |
| 31849 | "float __ovld __cnfn convert_float_rtn(double);\n" |
| 31850 | "float __ovld __cnfn convert_float_rtp(double);\n" |
| 31851 | "float __ovld __cnfn convert_float_rtz(double);\n" |
| 31852 | "float2 __ovld __cnfn convert_float2(double2);\n" |
| 31853 | "float2 __ovld __cnfn convert_float2_rte(double2);\n" |
| 31854 | "float2 __ovld __cnfn convert_float2_rtn(double2);\n" |
| 31855 | "float2 __ovld __cnfn convert_float2_rtp(double2);\n" |
| 31856 | "float2 __ovld __cnfn convert_float2_rtz(double2);\n" |
| 31857 | "float3 __ovld __cnfn convert_float3(double3);\n" |
| 31858 | "float3 __ovld __cnfn convert_float3_rte(double3);\n" |
| 31859 | "float3 __ovld __cnfn convert_float3_rtn(double3);\n" |
| 31860 | "float3 __ovld __cnfn convert_float3_rtp(double3);\n" |
| 31861 | "float3 __ovld __cnfn convert_float3_rtz(double3);\n" |
| 31862 | "float4 __ovld __cnfn convert_float4(double4);\n" |
| 31863 | "float4 __ovld __cnfn convert_float4_rte(double4);\n" |
| 31864 | "float4 __ovld __cnfn convert_float4_rtn(double4);\n" |
| 31865 | "float4 __ovld __cnfn convert_float4_rtp(double4);\n" |
| 31866 | "float4 __ovld __cnfn convert_float4_rtz(double4);\n" |
| 31867 | "float8 __ovld __cnfn convert_float8(double8);\n" |
| 31868 | "float8 __ovld __cnfn convert_float8_rte(double8);\n" |
| 31869 | "float8 __ovld __cnfn convert_float8_rtn(double8);\n" |
| 31870 | "float8 __ovld __cnfn convert_float8_rtp(double8);\n" |
| 31871 | "float8 __ovld __cnfn convert_float8_rtz(double8);\n" |
| 31872 | "float16 __ovld __cnfn convert_float16(double16);\n" |
| 31873 | "float16 __ovld __cnfn convert_float16_rte(double16);\n" |
| 31874 | "float16 __ovld __cnfn convert_float16_rtn(double16);\n" |
| 31875 | "float16 __ovld __cnfn convert_float16_rtp(double16);\n" |
| 31876 | "float16 __ovld __cnfn convert_float16_rtz(double16);\n" |
| 31877 | "\n" |
| 31878 | "double __ovld __cnfn convert_double(char);\n" |
| 31879 | "double __ovld __cnfn convert_double(double);\n" |
| 31880 | "double __ovld __cnfn convert_double(float);\n" |
| 31881 | "double __ovld __cnfn convert_double(int);\n" |
| 31882 | "double __ovld __cnfn convert_double(long);\n" |
| 31883 | "double __ovld __cnfn convert_double(short);\n" |
| 31884 | "double __ovld __cnfn convert_double(uchar);\n" |
| 31885 | "double __ovld __cnfn convert_double(uint);\n" |
| 31886 | "double __ovld __cnfn convert_double(ulong);\n" |
| 31887 | "double __ovld __cnfn convert_double(ushort);\n" |
| 31888 | "double __ovld __cnfn convert_double_rte(char);\n" |
| 31889 | "double __ovld __cnfn convert_double_rte(double);\n" |
| 31890 | "double __ovld __cnfn convert_double_rte(float);\n" |
| 31891 | "double __ovld __cnfn convert_double_rte(int);\n" |
| 31892 | "double __ovld __cnfn convert_double_rte(long);\n" |
| 31893 | "double __ovld __cnfn convert_double_rte(short);\n" |
| 31894 | "double __ovld __cnfn convert_double_rte(uchar);\n" |
| 31895 | "double __ovld __cnfn convert_double_rte(uint);\n" |
| 31896 | "double __ovld __cnfn convert_double_rte(ulong);\n" |
| 31897 | "double __ovld __cnfn convert_double_rte(ushort);\n" |
| 31898 | "double __ovld __cnfn convert_double_rtn(char);\n" |
| 31899 | "double __ovld __cnfn convert_double_rtn(double);\n" |
| 31900 | "double __ovld __cnfn convert_double_rtn(float);\n" |
| 31901 | "double __ovld __cnfn convert_double_rtn(int);\n" |
| 31902 | "double __ovld __cnfn convert_double_rtn(long);\n" |
| 31903 | "double __ovld __cnfn convert_double_rtn(short);\n" |
| 31904 | "double __ovld __cnfn convert_double_rtn(uchar);\n" |
| 31905 | "double __ovld __cnfn convert_double_rtn(uint);\n" |
| 31906 | "double __ovld __cnfn convert_double_rtn(ulong);\n" |
| 31907 | "double __ovld __cnfn convert_double_rtn(ushort);\n" |
| 31908 | "double __ovld __cnfn convert_double_rtp(char);\n" |
| 31909 | "double __ovld __cnfn convert_double_rtp(double);\n" |
| 31910 | "double __ovld __cnfn convert_double_rtp(float);\n" |
| 31911 | "double __ovld __cnfn convert_double_rtp(int);\n" |
| 31912 | "double __ovld __cnfn convert_double_rtp(long);\n" |
| 31913 | "double __ovld __cnfn convert_double_rtp(short);\n" |
| 31914 | "double __ovld __cnfn convert_double_rtp(uchar);\n" |
| 31915 | "double __ovld __cnfn convert_double_rtp(uint);\n" |
| 31916 | "double __ovld __cnfn convert_double_rtp(ulong);\n" |
| 31917 | "double __ovld __cnfn convert_double_rtp(ushort);\n" |
| 31918 | "double __ovld __cnfn convert_double_rtz(char);\n" |
| 31919 | "double __ovld __cnfn convert_double_rtz(double);\n" |
| 31920 | "double __ovld __cnfn convert_double_rtz(float);\n" |
| 31921 | "double __ovld __cnfn convert_double_rtz(int);\n" |
| 31922 | "double __ovld __cnfn convert_double_rtz(long);\n" |
| 31923 | "double __ovld __cnfn convert_double_rtz(short);\n" |
| 31924 | "double __ovld __cnfn convert_double_rtz(uchar);\n" |
| 31925 | "double __ovld __cnfn convert_double_rtz(uint);\n" |
| 31926 | "double __ovld __cnfn convert_double_rtz(ulong);\n" |
| 31927 | "double __ovld __cnfn convert_double_rtz(ushort);\n" |
| 31928 | "double2 __ovld __cnfn convert_double2(char2);\n" |
| 31929 | "double2 __ovld __cnfn convert_double2(double2);\n" |
| 31930 | "double2 __ovld __cnfn convert_double2(float2);\n" |
| 31931 | "double2 __ovld __cnfn convert_double2(int2);\n" |
| 31932 | "double2 __ovld __cnfn convert_double2(long2);\n" |
| 31933 | "double2 __ovld __cnfn convert_double2(short2);\n" |
| 31934 | "double2 __ovld __cnfn convert_double2(uchar2);\n" |
| 31935 | "double2 __ovld __cnfn convert_double2(uint2);\n" |
| 31936 | "double2 __ovld __cnfn convert_double2(ulong2);\n" |
| 31937 | "double2 __ovld __cnfn convert_double2(ushort2);\n" |
| 31938 | "double2 __ovld __cnfn convert_double2_rte(char2);\n" |
| 31939 | "double2 __ovld __cnfn convert_double2_rte(double2);\n" |
| 31940 | "double2 __ovld __cnfn convert_double2_rte(float2);\n" |
| 31941 | "double2 __ovld __cnfn convert_double2_rte(int2);\n" |
| 31942 | "double2 __ovld __cnfn convert_double2_rte(long2);\n" |
| 31943 | "double2 __ovld __cnfn convert_double2_rte(short2);\n" |
| 31944 | "double2 __ovld __cnfn convert_double2_rte(uchar2);\n" |
| 31945 | "double2 __ovld __cnfn convert_double2_rte(uint2);\n" |
| 31946 | "double2 __ovld __cnfn convert_double2_rte(ulong2);\n" |
| 31947 | "double2 __ovld __cnfn convert_double2_rte(ushort2);\n" |
| 31948 | "double2 __ovld __cnfn convert_double2_rtn(char2);\n" |
| 31949 | "double2 __ovld __cnfn convert_double2_rtn(double2);\n" |
| 31950 | "double2 __ovld __cnfn convert_double2_rtn(float2);\n" |
| 31951 | "double2 __ovld __cnfn convert_double2_rtn(int2);\n" |
| 31952 | "double2 __ovld __cnfn convert_double2_rtn(long2);\n" |
| 31953 | "double2 __ovld __cnfn convert_double2_rtn(short2);\n" |
| 31954 | "double2 __ovld __cnfn convert_double2_rtn(uchar2);\n" |
| 31955 | "double2 __ovld __cnfn convert_double2_rtn(uint2);\n" |
| 31956 | "double2 __ovld __cnfn convert_double2_rtn(ulong2);\n" |
| 31957 | "double2 __ovld __cnfn convert_double2_rtn(ushort2);\n" |
| 31958 | "double2 __ovld __cnfn convert_double2_rtp(char2);\n" |
| 31959 | "double2 __ovld __cnfn convert_double2_rtp(double2);\n" |
| 31960 | "double2 __ovld __cnfn convert_double2_rtp(float2);\n" |
| 31961 | "double2 __ovld __cnfn convert_double2_rtp(int2);\n" |
| 31962 | "double2 __ovld __cnfn convert_double2_rtp(long2);\n" |
| 31963 | "double2 __ovld __cnfn convert_double2_rtp(short2);\n" |
| 31964 | "double2 __ovld __cnfn convert_double2_rtp(uchar2);\n" |
| 31965 | "double2 __ovld __cnfn convert_double2_rtp(uint2);\n" |
| 31966 | "double2 __ovld __cnfn convert_double2_rtp(ulong2);\n" |
| 31967 | "double2 __ovld __cnfn convert_double2_rtp(ushort2);\n" |
| 31968 | "double2 __ovld __cnfn convert_double2_rtz(char2);\n" |
| 31969 | "double2 __ovld __cnfn convert_double2_rtz(double2);\n" |
| 31970 | "double2 __ovld __cnfn convert_double2_rtz(float2);\n" |
| 31971 | "double2 __ovld __cnfn convert_double2_rtz(int2);\n" |
| 31972 | "double2 __ovld __cnfn convert_double2_rtz(long2);\n" |
| 31973 | "double2 __ovld __cnfn convert_double2_rtz(short2);\n" |
| 31974 | "double2 __ovld __cnfn convert_double2_rtz(uchar2);\n" |
| 31975 | "double2 __ovld __cnfn convert_double2_rtz(uint2);\n" |
| 31976 | "double2 __ovld __cnfn convert_double2_rtz(ulong2);\n" |
| 31977 | "double2 __ovld __cnfn convert_double2_rtz(ushort2);\n" |
| 31978 | "double3 __ovld __cnfn convert_double3(char3);\n" |
| 31979 | "double3 __ovld __cnfn convert_double3(double3);\n" |
| 31980 | "double3 __ovld __cnfn convert_double3(float3);\n" |
| 31981 | "double3 __ovld __cnfn convert_double3(int3);\n" |
| 31982 | "double3 __ovld __cnfn convert_double3(long3);\n" |
| 31983 | "double3 __ovld __cnfn convert_double3(short3);\n" |
| 31984 | "double3 __ovld __cnfn convert_double3(uchar3);\n" |
| 31985 | "double3 __ovld __cnfn convert_double3(uint3);\n" |
| 31986 | "double3 __ovld __cnfn convert_double3(ulong3);\n" |
| 31987 | "double3 __ovld __cnfn convert_double3(ushort3);\n" |
| 31988 | "double3 __ovld __cnfn convert_double3_rte(char3);\n" |
| 31989 | "double3 __ovld __cnfn convert_double3_rte(double3);\n" |
| 31990 | "double3 __ovld __cnfn convert_double3_rte(float3);\n" |
| 31991 | "double3 __ovld __cnfn convert_double3_rte(int3);\n" |
| 31992 | "double3 __ovld __cnfn convert_double3_rte(long3);\n" |
| 31993 | "double3 __ovld __cnfn convert_double3_rte(short3);\n" |
| 31994 | "double3 __ovld __cnfn convert_double3_rte(uchar3);\n" |
| 31995 | "double3 __ovld __cnfn convert_double3_rte(uint3);\n" |
| 31996 | "double3 __ovld __cnfn convert_double3_rte(ulong3);\n" |
| 31997 | "double3 __ovld __cnfn convert_double3_rte(ushort3);\n" |
| 31998 | "double3 __ovld __cnfn convert_double3_rtn(char3);\n" |
| 31999 | "double3 __ovld __cnfn convert_double3_rtn(double3);\n" |
| 32000 | "double3 __ovld __cnfn convert_double3_rtn(float3);\n" |
| 32001 | "double3 __ovld __cnfn convert_double3_rtn(int3);\n" |
| 32002 | "double3 __ovld __cnfn convert_double3_rtn(long3);\n" |
| 32003 | "double3 __ovld __cnfn convert_double3_rtn(short3);\n" |
| 32004 | "double3 __ovld __cnfn convert_double3_rtn(uchar3);\n" |
| 32005 | "double3 __ovld __cnfn convert_double3_rtn(uint3);\n" |
| 32006 | "double3 __ovld __cnfn convert_double3_rtn(ulong3);\n" |
| 32007 | "double3 __ovld __cnfn convert_double3_rtn(ushort3);\n" |
| 32008 | "double3 __ovld __cnfn convert_double3_rtp(char3);\n" |
| 32009 | "double3 __ovld __cnfn convert_double3_rtp(double3);\n" |
| 32010 | "double3 __ovld __cnfn convert_double3_rtp(float3);\n" |
| 32011 | "double3 __ovld __cnfn convert_double3_rtp(int3);\n" |
| 32012 | "double3 __ovld __cnfn convert_double3_rtp(long3);\n" |
| 32013 | "double3 __ovld __cnfn convert_double3_rtp(short3);\n" |
| 32014 | "double3 __ovld __cnfn convert_double3_rtp(uchar3);\n" |
| 32015 | "double3 __ovld __cnfn convert_double3_rtp(uint3);\n" |
| 32016 | "double3 __ovld __cnfn convert_double3_rtp(ulong3);\n" |
| 32017 | "double3 __ovld __cnfn convert_double3_rtp(ushort3);\n" |
| 32018 | "double3 __ovld __cnfn convert_double3_rtz(char3);\n" |
| 32019 | "double3 __ovld __cnfn convert_double3_rtz(double3);\n" |
| 32020 | "double3 __ovld __cnfn convert_double3_rtz(float3);\n" |
| 32021 | "double3 __ovld __cnfn convert_double3_rtz(int3);\n" |
| 32022 | "double3 __ovld __cnfn convert_double3_rtz(long3);\n" |
| 32023 | "double3 __ovld __cnfn convert_double3_rtz(short3);\n" |
| 32024 | "double3 __ovld __cnfn convert_double3_rtz(uchar3);\n" |
| 32025 | "double3 __ovld __cnfn convert_double3_rtz(uint3);\n" |
| 32026 | "double3 __ovld __cnfn convert_double3_rtz(ulong3);\n" |
| 32027 | "double3 __ovld __cnfn convert_double3_rtz(ushort3);\n" |
| 32028 | "double4 __ovld __cnfn convert_double4(char4);\n" |
| 32029 | "double4 __ovld __cnfn convert_double4(double4);\n" |
| 32030 | "double4 __ovld __cnfn convert_double4(float4);\n" |
| 32031 | "double4 __ovld __cnfn convert_double4(int4);\n" |
| 32032 | "double4 __ovld __cnfn convert_double4(long4);\n" |
| 32033 | "double4 __ovld __cnfn convert_double4(short4);\n" |
| 32034 | "double4 __ovld __cnfn convert_double4(uchar4);\n" |
| 32035 | "double4 __ovld __cnfn convert_double4(uint4);\n" |
| 32036 | "double4 __ovld __cnfn convert_double4(ulong4);\n" |
| 32037 | "double4 __ovld __cnfn convert_double4(ushort4);\n" |
| 32038 | "double4 __ovld __cnfn convert_double4_rte(char4);\n" |
| 32039 | "double4 __ovld __cnfn convert_double4_rte(double4);\n" |
| 32040 | "double4 __ovld __cnfn convert_double4_rte(float4);\n" |
| 32041 | "double4 __ovld __cnfn convert_double4_rte(int4);\n" |
| 32042 | "double4 __ovld __cnfn convert_double4_rte(long4);\n" |
| 32043 | "double4 __ovld __cnfn convert_double4_rte(short4);\n" |
| 32044 | "double4 __ovld __cnfn convert_double4_rte(uchar4);\n" |
| 32045 | "double4 __ovld __cnfn convert_double4_rte(uint4);\n" |
| 32046 | "double4 __ovld __cnfn convert_double4_rte(ulong4);\n" |
| 32047 | "double4 __ovld __cnfn convert_double4_rte(ushort4);\n" |
| 32048 | "double4 __ovld __cnfn convert_double4_rtn(char4);\n" |
| 32049 | "double4 __ovld __cnfn convert_double4_rtn(double4);\n" |
| 32050 | "double4 __ovld __cnfn convert_double4_rtn(float4);\n" |
| 32051 | "double4 __ovld __cnfn convert_double4_rtn(int4);\n" |
| 32052 | "double4 __ovld __cnfn convert_double4_rtn(long4);\n" |
| 32053 | "double4 __ovld __cnfn convert_double4_rtn(short4);\n" |
| 32054 | "double4 __ovld __cnfn convert_double4_rtn(uchar4);\n" |
| 32055 | "double4 __ovld __cnfn convert_double4_rtn(uint4);\n" |
| 32056 | "double4 __ovld __cnfn convert_double4_rtn(ulong4);\n" |
| 32057 | "double4 __ovld __cnfn convert_double4_rtn(ushort4);\n" |
| 32058 | "double4 __ovld __cnfn convert_double4_rtp(char4);\n" |
| 32059 | "double4 __ovld __cnfn convert_double4_rtp(double4);\n" |
| 32060 | "double4 __ovld __cnfn convert_double4_rtp(float4);\n" |
| 32061 | "double4 __ovld __cnfn convert_double4_rtp(int4);\n" |
| 32062 | "double4 __ovld __cnfn convert_double4_rtp(long4);\n" |
| 32063 | "double4 __ovld __cnfn convert_double4_rtp(short4);\n" |
| 32064 | "double4 __ovld __cnfn convert_double4_rtp(uchar4);\n" |
| 32065 | "double4 __ovld __cnfn convert_double4_rtp(uint4);\n" |
| 32066 | "double4 __ovld __cnfn convert_double4_rtp(ulong4);\n" |
| 32067 | "double4 __ovld __cnfn convert_double4_rtp(ushort4);\n" |
| 32068 | "double4 __ovld __cnfn convert_double4_rtz(char4);\n" |
| 32069 | "double4 __ovld __cnfn convert_double4_rtz(double4);\n" |
| 32070 | "double4 __ovld __cnfn convert_double4_rtz(float4);\n" |
| 32071 | "double4 __ovld __cnfn convert_double4_rtz(int4);\n" |
| 32072 | "double4 __ovld __cnfn convert_double4_rtz(long4);\n" |
| 32073 | "double4 __ovld __cnfn convert_double4_rtz(short4);\n" |
| 32074 | "double4 __ovld __cnfn convert_double4_rtz(uchar4);\n" |
| 32075 | "double4 __ovld __cnfn convert_double4_rtz(uint4);\n" |
| 32076 | "double4 __ovld __cnfn convert_double4_rtz(ulong4);\n" |
| 32077 | "double4 __ovld __cnfn convert_double4_rtz(ushort4);\n" |
| 32078 | "double8 __ovld __cnfn convert_double8(char8);\n" |
| 32079 | "double8 __ovld __cnfn convert_double8(double8);\n" |
| 32080 | "double8 __ovld __cnfn convert_double8(float8);\n" |
| 32081 | "double8 __ovld __cnfn convert_double8(int8);\n" |
| 32082 | "double8 __ovld __cnfn convert_double8(long8);\n" |
| 32083 | "double8 __ovld __cnfn convert_double8(short8);\n" |
| 32084 | "double8 __ovld __cnfn convert_double8(uchar8);\n" |
| 32085 | "double8 __ovld __cnfn convert_double8(uint8);\n" |
| 32086 | "double8 __ovld __cnfn convert_double8(ulong8);\n" |
| 32087 | "double8 __ovld __cnfn convert_double8(ushort8);\n" |
| 32088 | "double8 __ovld __cnfn convert_double8_rte(char8);\n" |
| 32089 | "double8 __ovld __cnfn convert_double8_rte(double8);\n" |
| 32090 | "double8 __ovld __cnfn convert_double8_rte(float8);\n" |
| 32091 | "double8 __ovld __cnfn convert_double8_rte(int8);\n" |
| 32092 | "double8 __ovld __cnfn convert_double8_rte(long8);\n" |
| 32093 | "double8 __ovld __cnfn convert_double8_rte(short8);\n" |
| 32094 | "double8 __ovld __cnfn convert_double8_rte(uchar8);\n" |
| 32095 | "double8 __ovld __cnfn convert_double8_rte(uint8);\n" |
| 32096 | "double8 __ovld __cnfn convert_double8_rte(ulong8);\n" |
| 32097 | "double8 __ovld __cnfn convert_double8_rte(ushort8);\n" |
| 32098 | "double8 __ovld __cnfn convert_double8_rtn(char8);\n" |
| 32099 | "double8 __ovld __cnfn convert_double8_rtn(double8);\n" |
| 32100 | "double8 __ovld __cnfn convert_double8_rtn(float8);\n" |
| 32101 | "double8 __ovld __cnfn convert_double8_rtn(int8);\n" |
| 32102 | "double8 __ovld __cnfn convert_double8_rtn(long8);\n" |
| 32103 | "double8 __ovld __cnfn convert_double8_rtn(short8);\n" |
| 32104 | "double8 __ovld __cnfn convert_double8_rtn(uchar8);\n" |
| 32105 | "double8 __ovld __cnfn convert_double8_rtn(uint8);\n" |
| 32106 | "double8 __ovld __cnfn convert_double8_rtn(ulong8);\n" |
| 32107 | "double8 __ovld __cnfn convert_double8_rtn(ushort8);\n" |
| 32108 | "double8 __ovld __cnfn convert_double8_rtp(char8);\n" |
| 32109 | "double8 __ovld __cnfn convert_double8_rtp(double8);\n" |
| 32110 | "double8 __ovld __cnfn convert_double8_rtp(float8);\n" |
| 32111 | "double8 __ovld __cnfn convert_double8_rtp(int8);\n" |
| 32112 | "double8 __ovld __cnfn convert_double8_rtp(long8);\n" |
| 32113 | "double8 __ovld __cnfn convert_double8_rtp(short8);\n" |
| 32114 | "double8 __ovld __cnfn convert_double8_rtp(uchar8);\n" |
| 32115 | "double8 __ovld __cnfn convert_double8_rtp(uint8);\n" |
| 32116 | "double8 __ovld __cnfn convert_double8_rtp(ulong8);\n" |
| 32117 | "double8 __ovld __cnfn convert_double8_rtp(ushort8);\n" |
| 32118 | "double8 __ovld __cnfn convert_double8_rtz(char8);\n" |
| 32119 | "double8 __ovld __cnfn convert_double8_rtz(double8);\n" |
| 32120 | "double8 __ovld __cnfn convert_double8_rtz(float8);\n" |
| 32121 | "double8 __ovld __cnfn convert_double8_rtz(int8);\n" |
| 32122 | "double8 __ovld __cnfn convert_double8_rtz(long8);\n" |
| 32123 | "double8 __ovld __cnfn convert_double8_rtz(short8);\n" |
| 32124 | "double8 __ovld __cnfn convert_double8_rtz(uchar8);\n" |
| 32125 | "double8 __ovld __cnfn convert_double8_rtz(uint8);\n" |
| 32126 | "double8 __ovld __cnfn convert_double8_rtz(ulong8);\n" |
| 32127 | "double8 __ovld __cnfn convert_double8_rtz(ushort8);\n" |
| 32128 | "double16 __ovld __cnfn convert_double16(char16);\n" |
| 32129 | "double16 __ovld __cnfn convert_double16(double16);\n" |
| 32130 | "double16 __ovld __cnfn convert_double16(float16);\n" |
| 32131 | "double16 __ovld __cnfn convert_double16(int16);\n" |
| 32132 | "double16 __ovld __cnfn convert_double16(long16);\n" |
| 32133 | "double16 __ovld __cnfn convert_double16(short16);\n" |
| 32134 | "double16 __ovld __cnfn convert_double16(uchar16);\n" |
| 32135 | "double16 __ovld __cnfn convert_double16(uint16);\n" |
| 32136 | "double16 __ovld __cnfn convert_double16(ulong16);\n" |
| 32137 | "double16 __ovld __cnfn convert_double16(ushort16);\n" |
| 32138 | "double16 __ovld __cnfn convert_double16_rte(char16);\n" |
| 32139 | "double16 __ovld __cnfn convert_double16_rte(double16);\n" |
| 32140 | "double16 __ovld __cnfn convert_double16_rte(float16);\n" |
| 32141 | "double16 __ovld __cnfn convert_double16_rte(int16);\n" |
| 32142 | "double16 __ovld __cnfn convert_double16_rte(long16);\n" |
| 32143 | "double16 __ovld __cnfn convert_double16_rte(short16);\n" |
| 32144 | "double16 __ovld __cnfn convert_double16_rte(uchar16);\n" |
| 32145 | "double16 __ovld __cnfn convert_double16_rte(uint16);\n" |
| 32146 | "double16 __ovld __cnfn convert_double16_rte(ulong16);\n" |
| 32147 | "double16 __ovld __cnfn convert_double16_rte(ushort16);\n" |
| 32148 | "double16 __ovld __cnfn convert_double16_rtn(char16);\n" |
| 32149 | "double16 __ovld __cnfn convert_double16_rtn(double16);\n" |
| 32150 | "double16 __ovld __cnfn convert_double16_rtn(float16);\n" |
| 32151 | "double16 __ovld __cnfn convert_double16_rtn(int16);\n" |
| 32152 | "double16 __ovld __cnfn convert_double16_rtn(long16);\n" |
| 32153 | "double16 __ovld __cnfn convert_double16_rtn(short16);\n" |
| 32154 | "double16 __ovld __cnfn convert_double16_rtn(uchar16);\n" |
| 32155 | "double16 __ovld __cnfn convert_double16_rtn(uint16);\n" |
| 32156 | "double16 __ovld __cnfn convert_double16_rtn(ulong16);\n" |
| 32157 | "double16 __ovld __cnfn convert_double16_rtn(ushort16);\n" |
| 32158 | "double16 __ovld __cnfn convert_double16_rtp(char16);\n" |
| 32159 | "double16 __ovld __cnfn convert_double16_rtp(double16);\n" |
| 32160 | "double16 __ovld __cnfn convert_double16_rtp(float16);\n" |
| 32161 | "double16 __ovld __cnfn convert_double16_rtp(int16);\n" |
| 32162 | "double16 __ovld __cnfn convert_double16_rtp(long16);\n" |
| 32163 | "double16 __ovld __cnfn convert_double16_rtp(short16);\n" |
| 32164 | "double16 __ovld __cnfn convert_double16_rtp(uchar16);\n" |
| 32165 | "double16 __ovld __cnfn convert_double16_rtp(uint16);\n" |
| 32166 | "double16 __ovld __cnfn convert_double16_rtp(ulong16);\n" |
| 32167 | "double16 __ovld __cnfn convert_double16_rtp(ushort16);\n" |
| 32168 | "double16 __ovld __cnfn convert_double16_rtz(char16);\n" |
| 32169 | "double16 __ovld __cnfn convert_double16_rtz(double16);\n" |
| 32170 | "double16 __ovld __cnfn convert_double16_rtz(float16);\n" |
| 32171 | "double16 __ovld __cnfn convert_double16_rtz(int16);\n" |
| 32172 | "double16 __ovld __cnfn convert_double16_rtz(long16);\n" |
| 32173 | "double16 __ovld __cnfn convert_double16_rtz(short16);\n" |
| 32174 | "double16 __ovld __cnfn convert_double16_rtz(uchar16);\n" |
| 32175 | "double16 __ovld __cnfn convert_double16_rtz(uint16);\n" |
| 32176 | "double16 __ovld __cnfn convert_double16_rtz(ulong16);\n" |
| 32177 | "double16 __ovld __cnfn convert_double16_rtz(ushort16);\n" |
| 32178 | "#endif //cl_khr_fp64\n" |
| 32179 | "\n" |
| 32180 | "#ifdef cl_khr_fp16\n" |
| 32181 | "// Convert half types to non-double types.\n" |
| 32182 | "uchar __ovld __cnfn convert_uchar(half);\n" |
| 32183 | "uchar __ovld __cnfn convert_uchar_rte(half);\n" |
| 32184 | "uchar __ovld __cnfn convert_uchar_rtp(half);\n" |
| 32185 | "uchar __ovld __cnfn convert_uchar_rtn(half);\n" |
| 32186 | "uchar __ovld __cnfn convert_uchar_rtz(half);\n" |
| 32187 | "uchar __ovld __cnfn convert_uchar_sat(half);\n" |
| 32188 | "uchar __ovld __cnfn convert_uchar_sat_rte(half);\n" |
| 32189 | "uchar __ovld __cnfn convert_uchar_sat_rtp(half);\n" |
| 32190 | "uchar __ovld __cnfn convert_uchar_sat_rtn(half);\n" |
| 32191 | "uchar __ovld __cnfn convert_uchar_sat_rtz(half);\n" |
| 32192 | "uchar2 __ovld __cnfn convert_uchar2(half2);\n" |
| 32193 | "uchar2 __ovld __cnfn convert_uchar2_rte(half2);\n" |
| 32194 | "uchar2 __ovld __cnfn convert_uchar2_rtp(half2);\n" |
| 32195 | "uchar2 __ovld __cnfn convert_uchar2_rtn(half2);\n" |
| 32196 | "uchar2 __ovld __cnfn convert_uchar2_rtz(half2);\n" |
| 32197 | "uchar2 __ovld __cnfn convert_uchar2_sat(half2);\n" |
| 32198 | "uchar2 __ovld __cnfn convert_uchar2_sat_rte(half2);\n" |
| 32199 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtp(half2);\n" |
| 32200 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtn(half2);\n" |
| 32201 | "uchar2 __ovld __cnfn convert_uchar2_sat_rtz(half2);\n" |
| 32202 | "uchar3 __ovld __cnfn convert_uchar3(half3);\n" |
| 32203 | "uchar3 __ovld __cnfn convert_uchar3_rte(half3);\n" |
| 32204 | "uchar3 __ovld __cnfn convert_uchar3_rtp(half3);\n" |
| 32205 | "uchar3 __ovld __cnfn convert_uchar3_rtn(half3);\n" |
| 32206 | "uchar3 __ovld __cnfn convert_uchar3_rtz(half3);\n" |
| 32207 | "uchar3 __ovld __cnfn convert_uchar3_sat(half3);\n" |
| 32208 | "uchar3 __ovld __cnfn convert_uchar3_sat_rte(half3);\n" |
| 32209 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtp(half3);\n" |
| 32210 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtn(half3);\n" |
| 32211 | "uchar3 __ovld __cnfn convert_uchar3_sat_rtz(half3);\n" |
| 32212 | "uchar4 __ovld __cnfn convert_uchar4(half4);\n" |
| 32213 | "uchar4 __ovld __cnfn convert_uchar4_rte(half4);\n" |
| 32214 | "uchar4 __ovld __cnfn convert_uchar4_rtp(half4);\n" |
| 32215 | "uchar4 __ovld __cnfn convert_uchar4_rtn(half4);\n" |
| 32216 | "uchar4 __ovld __cnfn convert_uchar4_rtz(half4);\n" |
| 32217 | "uchar4 __ovld __cnfn convert_uchar4_sat(half4);\n" |
| 32218 | "uchar4 __ovld __cnfn convert_uchar4_sat_rte(half4);\n" |
| 32219 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtp(half4);\n" |
| 32220 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtn(half4);\n" |
| 32221 | "uchar4 __ovld __cnfn convert_uchar4_sat_rtz(half4);\n" |
| 32222 | "uchar8 __ovld __cnfn convert_uchar8(half8);\n" |
| 32223 | "uchar8 __ovld __cnfn convert_uchar8_rte(half8);\n" |
| 32224 | "uchar8 __ovld __cnfn convert_uchar8_rtp(half8);\n" |
| 32225 | "uchar8 __ovld __cnfn convert_uchar8_rtn(half8);\n" |
| 32226 | "uchar8 __ovld __cnfn convert_uchar8_rtz(half8);\n" |
| 32227 | "uchar8 __ovld __cnfn convert_uchar8_sat(half8);\n" |
| 32228 | "uchar8 __ovld __cnfn convert_uchar8_sat_rte(half8);\n" |
| 32229 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtp(half8);\n" |
| 32230 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtn(half8);\n" |
| 32231 | "uchar8 __ovld __cnfn convert_uchar8_sat_rtz(half8);\n" |
| 32232 | "uchar16 __ovld __cnfn convert_uchar16(half16);\n" |
| 32233 | "uchar16 __ovld __cnfn convert_uchar16_rte(half16);\n" |
| 32234 | "uchar16 __ovld __cnfn convert_uchar16_rtp(half16);\n" |
| 32235 | "uchar16 __ovld __cnfn convert_uchar16_rtn(half16);\n" |
| 32236 | "uchar16 __ovld __cnfn convert_uchar16_rtz(half16);\n" |
| 32237 | "uchar16 __ovld __cnfn convert_uchar16_sat(half16);\n" |
| 32238 | "uchar16 __ovld __cnfn convert_uchar16_sat_rte(half16);\n" |
| 32239 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtp(half16);\n" |
| 32240 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtn(half16);\n" |
| 32241 | "uchar16 __ovld __cnfn convert_uchar16_sat_rtz(half16);\n" |
| 32242 | "ushort __ovld __cnfn convert_ushort(half);\n" |
| 32243 | "ushort __ovld __cnfn convert_ushort_rte(half);\n" |
| 32244 | "ushort __ovld __cnfn convert_ushort_rtp(half);\n" |
| 32245 | "ushort __ovld __cnfn convert_ushort_rtn(half);\n" |
| 32246 | "ushort __ovld __cnfn convert_ushort_rtz(half);\n" |
| 32247 | "ushort __ovld __cnfn convert_ushort_sat(half);\n" |
| 32248 | "ushort __ovld __cnfn convert_ushort_sat_rte(half);\n" |
| 32249 | "ushort __ovld __cnfn convert_ushort_sat_rtp(half);\n" |
| 32250 | "ushort __ovld __cnfn convert_ushort_sat_rtn(half);\n" |
| 32251 | "ushort __ovld __cnfn convert_ushort_sat_rtz(half);\n" |
| 32252 | "ushort2 __ovld __cnfn convert_ushort2(half2);\n" |
| 32253 | "ushort2 __ovld __cnfn convert_ushort2_rte(half2);\n" |
| 32254 | "ushort2 __ovld __cnfn convert_ushort2_rtp(half2);\n" |
| 32255 | "ushort2 __ovld __cnfn convert_ushort2_rtn(half2);\n" |
| 32256 | "ushort2 __ovld __cnfn convert_ushort2_rtz(half2);\n" |
| 32257 | "ushort2 __ovld __cnfn convert_ushort2_sat(half2);\n" |
| 32258 | "ushort2 __ovld __cnfn convert_ushort2_sat_rte(half2);\n" |
| 32259 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtp(half2);\n" |
| 32260 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtn(half2);\n" |
| 32261 | "ushort2 __ovld __cnfn convert_ushort2_sat_rtz(half2);\n" |
| 32262 | "ushort3 __ovld __cnfn convert_ushort3(half3);\n" |
| 32263 | "ushort3 __ovld __cnfn convert_ushort3_rte(half3);\n" |
| 32264 | "ushort3 __ovld __cnfn convert_ushort3_rtp(half3);\n" |
| 32265 | "ushort3 __ovld __cnfn convert_ushort3_rtn(half3);\n" |
| 32266 | "ushort3 __ovld __cnfn convert_ushort3_rtz(half3);\n" |
| 32267 | "ushort3 __ovld __cnfn convert_ushort3_sat(half3);\n" |
| 32268 | "ushort3 __ovld __cnfn convert_ushort3_sat_rte(half3);\n" |
| 32269 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtp(half3);\n" |
| 32270 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtn(half3);\n" |
| 32271 | "ushort3 __ovld __cnfn convert_ushort3_sat_rtz(half3);\n" |
| 32272 | "ushort4 __ovld __cnfn convert_ushort4(half4);\n" |
| 32273 | "ushort4 __ovld __cnfn convert_ushort4_rte(half4);\n" |
| 32274 | "ushort4 __ovld __cnfn convert_ushort4_rtp(half4);\n" |
| 32275 | "ushort4 __ovld __cnfn convert_ushort4_rtn(half4);\n" |
| 32276 | "ushort4 __ovld __cnfn convert_ushort4_rtz(half4);\n" |
| 32277 | "ushort4 __ovld __cnfn convert_ushort4_sat(half4);\n" |
| 32278 | "ushort4 __ovld __cnfn convert_ushort4_sat_rte(half4);\n" |
| 32279 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtp(half4);\n" |
| 32280 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtn(half4);\n" |
| 32281 | "ushort4 __ovld __cnfn convert_ushort4_sat_rtz(half4);\n" |
| 32282 | "ushort8 __ovld __cnfn convert_ushort8(half8);\n" |
| 32283 | "ushort8 __ovld __cnfn convert_ushort8_rte(half8);\n" |
| 32284 | "ushort8 __ovld __cnfn convert_ushort8_rtp(half8);\n" |
| 32285 | "ushort8 __ovld __cnfn convert_ushort8_rtn(half8);\n" |
| 32286 | "ushort8 __ovld __cnfn convert_ushort8_rtz(half8);\n" |
| 32287 | "ushort8 __ovld __cnfn convert_ushort8_sat(half8);\n" |
| 32288 | "ushort8 __ovld __cnfn convert_ushort8_sat_rte(half8);\n" |
| 32289 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtp(half8);\n" |
| 32290 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtn(half8);\n" |
| 32291 | "ushort8 __ovld __cnfn convert_ushort8_sat_rtz(half8);\n" |
| 32292 | "ushort16 __ovld __cnfn convert_ushort16(half16);\n" |
| 32293 | "ushort16 __ovld __cnfn convert_ushort16_rte(half16);\n" |
| 32294 | "ushort16 __ovld __cnfn convert_ushort16_rtp(half16);\n" |
| 32295 | "ushort16 __ovld __cnfn convert_ushort16_rtn(half16);\n" |
| 32296 | "ushort16 __ovld __cnfn convert_ushort16_rtz(half16);\n" |
| 32297 | "ushort16 __ovld __cnfn convert_ushort16_sat(half16);\n" |
| 32298 | "ushort16 __ovld __cnfn convert_ushort16_sat_rte(half16);\n" |
| 32299 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtp(half16);\n" |
| 32300 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtn(half16);\n" |
| 32301 | "ushort16 __ovld __cnfn convert_ushort16_sat_rtz(half16);\n" |
| 32302 | "uint __ovld __cnfn convert_uint(half);\n" |
| 32303 | "uint __ovld __cnfn convert_uint_rte(half);\n" |
| 32304 | "uint __ovld __cnfn convert_uint_rtp(half);\n" |
| 32305 | "uint __ovld __cnfn convert_uint_rtn(half);\n" |
| 32306 | "uint __ovld __cnfn convert_uint_rtz(half);\n" |
| 32307 | "uint __ovld __cnfn convert_uint_sat(half);\n" |
| 32308 | "uint __ovld __cnfn convert_uint_sat_rte(half);\n" |
| 32309 | "uint __ovld __cnfn convert_uint_sat_rtp(half);\n" |
| 32310 | "uint __ovld __cnfn convert_uint_sat_rtn(half);\n" |
| 32311 | "uint __ovld __cnfn convert_uint_sat_rtz(half);\n" |
| 32312 | "uint2 __ovld __cnfn convert_uint2(half2);\n" |
| 32313 | "uint2 __ovld __cnfn convert_uint2_rte(half2);\n" |
| 32314 | "uint2 __ovld __cnfn convert_uint2_rtp(half2);\n" |
| 32315 | "uint2 __ovld __cnfn convert_uint2_rtn(half2);\n" |
| 32316 | "uint2 __ovld __cnfn convert_uint2_rtz(half2);\n" |
| 32317 | "uint2 __ovld __cnfn convert_uint2_sat(half2);\n" |
| 32318 | "uint2 __ovld __cnfn convert_uint2_sat_rte(half2);\n" |
| 32319 | "uint2 __ovld __cnfn convert_uint2_sat_rtp(half2);\n" |
| 32320 | "uint2 __ovld __cnfn convert_uint2_sat_rtn(half2);\n" |
| 32321 | "uint2 __ovld __cnfn convert_uint2_sat_rtz(half2);\n" |
| 32322 | "uint3 __ovld __cnfn convert_uint3(half3);\n" |
| 32323 | "uint3 __ovld __cnfn convert_uint3_rte(half3);\n" |
| 32324 | "uint3 __ovld __cnfn convert_uint3_rtp(half3);\n" |
| 32325 | "uint3 __ovld __cnfn convert_uint3_rtn(half3);\n" |
| 32326 | "uint3 __ovld __cnfn convert_uint3_rtz(half3);\n" |
| 32327 | "uint3 __ovld __cnfn convert_uint3_sat(half3);\n" |
| 32328 | "uint3 __ovld __cnfn convert_uint3_sat_rte(half3);\n" |
| 32329 | "uint3 __ovld __cnfn convert_uint3_sat_rtp(half3);\n" |
| 32330 | "uint3 __ovld __cnfn convert_uint3_sat_rtn(half3);\n" |
| 32331 | "uint3 __ovld __cnfn convert_uint3_sat_rtz(half3);\n" |
| 32332 | "uint4 __ovld __cnfn convert_uint4(half4);\n" |
| 32333 | "uint4 __ovld __cnfn convert_uint4_rte(half4);\n" |
| 32334 | "uint4 __ovld __cnfn convert_uint4_rtp(half4);\n" |
| 32335 | "uint4 __ovld __cnfn convert_uint4_rtn(half4);\n" |
| 32336 | "uint4 __ovld __cnfn convert_uint4_rtz(half4);\n" |
| 32337 | "uint4 __ovld __cnfn convert_uint4_sat(half4);\n" |
| 32338 | "uint4 __ovld __cnfn convert_uint4_sat_rte(half4);\n" |
| 32339 | "uint4 __ovld __cnfn convert_uint4_sat_rtp(half4);\n" |
| 32340 | "uint4 __ovld __cnfn convert_uint4_sat_rtn(half4);\n" |
| 32341 | "uint4 __ovld __cnfn convert_uint4_sat_rtz(half4);\n" |
| 32342 | "uint8 __ovld __cnfn convert_uint8(half8);\n" |
| 32343 | "uint8 __ovld __cnfn convert_uint8_rte(half8);\n" |
| 32344 | "uint8 __ovld __cnfn convert_uint8_rtp(half8);\n" |
| 32345 | "uint8 __ovld __cnfn convert_uint8_rtn(half8);\n" |
| 32346 | "uint8 __ovld __cnfn convert_uint8_rtz(half8);\n" |
| 32347 | "uint8 __ovld __cnfn convert_uint8_sat(half8);\n" |
| 32348 | "uint8 __ovld __cnfn convert_uint8_sat_rte(half8);\n" |
| 32349 | "uint8 __ovld __cnfn convert_uint8_sat_rtp(half8);\n" |
| 32350 | "uint8 __ovld __cnfn convert_uint8_sat_rtn(half8);\n" |
| 32351 | "uint8 __ovld __cnfn convert_uint8_sat_rtz(half8);\n" |
| 32352 | "uint16 __ovld __cnfn convert_uint16(half16);\n" |
| 32353 | "uint16 __ovld __cnfn convert_uint16_rte(half16);\n" |
| 32354 | "uint16 __ovld __cnfn convert_uint16_rtp(half16);\n" |
| 32355 | "uint16 __ovld __cnfn convert_uint16_rtn(half16);\n" |
| 32356 | "uint16 __ovld __cnfn convert_uint16_rtz(half16);\n" |
| 32357 | "uint16 __ovld __cnfn convert_uint16_sat(half16);\n" |
| 32358 | "uint16 __ovld __cnfn convert_uint16_sat_rte(half16);\n" |
| 32359 | "uint16 __ovld __cnfn convert_uint16_sat_rtp(half16);\n" |
| 32360 | "uint16 __ovld __cnfn convert_uint16_sat_rtn(half16);\n" |
| 32361 | "uint16 __ovld __cnfn convert_uint16_sat_rtz(half16);\n" |
| 32362 | "ulong __ovld __cnfn convert_ulong(half);\n" |
| 32363 | "ulong __ovld __cnfn convert_ulong_rte(half);\n" |
| 32364 | "ulong __ovld __cnfn convert_ulong_rtp(half);\n" |
| 32365 | "ulong __ovld __cnfn convert_ulong_rtn(half);\n" |
| 32366 | "ulong __ovld __cnfn convert_ulong_rtz(half);\n" |
| 32367 | "ulong __ovld __cnfn convert_ulong_sat(half);\n" |
| 32368 | "ulong __ovld __cnfn convert_ulong_sat_rte(half);\n" |
| 32369 | "ulong __ovld __cnfn convert_ulong_sat_rtp(half);\n" |
| 32370 | "ulong __ovld __cnfn convert_ulong_sat_rtn(half);\n" |
| 32371 | "ulong __ovld __cnfn convert_ulong_sat_rtz(half);\n" |
| 32372 | "ulong2 __ovld __cnfn convert_ulong2(half2);\n" |
| 32373 | "ulong2 __ovld __cnfn convert_ulong2_rte(half2);\n" |
| 32374 | "ulong2 __ovld __cnfn convert_ulong2_rtp(half2);\n" |
| 32375 | "ulong2 __ovld __cnfn convert_ulong2_rtn(half2);\n" |
| 32376 | "ulong2 __ovld __cnfn convert_ulong2_rtz(half2);\n" |
| 32377 | "ulong2 __ovld __cnfn convert_ulong2_sat(half2);\n" |
| 32378 | "ulong2 __ovld __cnfn convert_ulong2_sat_rte(half2);\n" |
| 32379 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtp(half2);\n" |
| 32380 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtn(half2);\n" |
| 32381 | "ulong2 __ovld __cnfn convert_ulong2_sat_rtz(half2);\n" |
| 32382 | "ulong3 __ovld __cnfn convert_ulong3(half3);\n" |
| 32383 | "ulong3 __ovld __cnfn convert_ulong3_rte(half3);\n" |
| 32384 | "ulong3 __ovld __cnfn convert_ulong3_rtp(half3);\n" |
| 32385 | "ulong3 __ovld __cnfn convert_ulong3_rtn(half3);\n" |
| 32386 | "ulong3 __ovld __cnfn convert_ulong3_rtz(half3);\n" |
| 32387 | "ulong3 __ovld __cnfn convert_ulong3_sat(half3);\n" |
| 32388 | "ulong3 __ovld __cnfn convert_ulong3_sat_rte(half3);\n" |
| 32389 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtp(half3);\n" |
| 32390 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtn(half3);\n" |
| 32391 | "ulong3 __ovld __cnfn convert_ulong3_sat_rtz(half3);\n" |
| 32392 | "ulong4 __ovld __cnfn convert_ulong4(half4);\n" |
| 32393 | "ulong4 __ovld __cnfn convert_ulong4_rte(half4);\n" |
| 32394 | "ulong4 __ovld __cnfn convert_ulong4_rtp(half4);\n" |
| 32395 | "ulong4 __ovld __cnfn convert_ulong4_rtn(half4);\n" |
| 32396 | "ulong4 __ovld __cnfn convert_ulong4_rtz(half4);\n" |
| 32397 | "ulong4 __ovld __cnfn convert_ulong4_sat(half4);\n" |
| 32398 | "ulong4 __ovld __cnfn convert_ulong4_sat_rte(half4);\n" |
| 32399 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtp(half4);\n" |
| 32400 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtn(half4);\n" |
| 32401 | "ulong4 __ovld __cnfn convert_ulong4_sat_rtz(half4);\n" |
| 32402 | "ulong8 __ovld __cnfn convert_ulong8(half8);\n" |
| 32403 | "ulong8 __ovld __cnfn convert_ulong8_rte(half8);\n" |
| 32404 | "ulong8 __ovld __cnfn convert_ulong8_rtp(half8);\n" |
| 32405 | "ulong8 __ovld __cnfn convert_ulong8_rtn(half8);\n" |
| 32406 | "ulong8 __ovld __cnfn convert_ulong8_rtz(half8);\n" |
| 32407 | "ulong8 __ovld __cnfn convert_ulong8_sat(half8);\n" |
| 32408 | "ulong8 __ovld __cnfn convert_ulong8_sat_rte(half8);\n" |
| 32409 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtp(half8);\n" |
| 32410 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtn(half8);\n" |
| 32411 | "ulong8 __ovld __cnfn convert_ulong8_sat_rtz(half8);\n" |
| 32412 | "ulong16 __ovld __cnfn convert_ulong16(half16);\n" |
| 32413 | "ulong16 __ovld __cnfn convert_ulong16_rte(half16);\n" |
| 32414 | "ulong16 __ovld __cnfn convert_ulong16_rtp(half16);\n" |
| 32415 | "ulong16 __ovld __cnfn convert_ulong16_rtn(half16);\n" |
| 32416 | "ulong16 __ovld __cnfn convert_ulong16_rtz(half16);\n" |
| 32417 | "ulong16 __ovld __cnfn convert_ulong16_sat(half16);\n" |
| 32418 | "ulong16 __ovld __cnfn convert_ulong16_sat_rte(half16);\n" |
| 32419 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtp(half16);\n" |
| 32420 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtn(half16);\n" |
| 32421 | "ulong16 __ovld __cnfn convert_ulong16_sat_rtz(half16);\n" |
| 32422 | "char __ovld __cnfn convert_char(half);\n" |
| 32423 | "char __ovld __cnfn convert_char_rte(half);\n" |
| 32424 | "char __ovld __cnfn convert_char_rtp(half);\n" |
| 32425 | "char __ovld __cnfn convert_char_rtn(half);\n" |
| 32426 | "char __ovld __cnfn convert_char_rtz(half);\n" |
| 32427 | "char __ovld __cnfn convert_char_sat(half);\n" |
| 32428 | "char __ovld __cnfn convert_char_sat_rte(half);\n" |
| 32429 | "char __ovld __cnfn convert_char_sat_rtp(half);\n" |
| 32430 | "char __ovld __cnfn convert_char_sat_rtn(half);\n" |
| 32431 | "char __ovld __cnfn convert_char_sat_rtz(half);\n" |
| 32432 | "char2 __ovld __cnfn convert_char2(half2);\n" |
| 32433 | "char2 __ovld __cnfn convert_char2_rte(half2);\n" |
| 32434 | "char2 __ovld __cnfn convert_char2_rtp(half2);\n" |
| 32435 | "char2 __ovld __cnfn convert_char2_rtn(half2);\n" |
| 32436 | "char2 __ovld __cnfn convert_char2_rtz(half2);\n" |
| 32437 | "char2 __ovld __cnfn convert_char2_sat(half2);\n" |
| 32438 | "char2 __ovld __cnfn convert_char2_sat_rte(half2);\n" |
| 32439 | "char2 __ovld __cnfn convert_char2_sat_rtp(half2);\n" |
| 32440 | "char2 __ovld __cnfn convert_char2_sat_rtn(half2);\n" |
| 32441 | "char2 __ovld __cnfn convert_char2_sat_rtz(half2);\n" |
| 32442 | "char3 __ovld __cnfn convert_char3(half3);\n" |
| 32443 | "char3 __ovld __cnfn convert_char3_rte(half3);\n" |
| 32444 | "char3 __ovld __cnfn convert_char3_rtp(half3);\n" |
| 32445 | "char3 __ovld __cnfn convert_char3_rtn(half3);\n" |
| 32446 | "char3 __ovld __cnfn convert_char3_rtz(half3);\n" |
| 32447 | "char3 __ovld __cnfn convert_char3_sat(half3);\n" |
| 32448 | "char3 __ovld __cnfn convert_char3_sat_rte(half3);\n" |
| 32449 | "char3 __ovld __cnfn convert_char3_sat_rtp(half3);\n" |
| 32450 | "char3 __ovld __cnfn convert_char3_sat_rtn(half3);\n" |
| 32451 | "char3 __ovld __cnfn convert_char3_sat_rtz(half3);\n" |
| 32452 | "char4 __ovld __cnfn convert_char4(half4);\n" |
| 32453 | "char4 __ovld __cnfn convert_char4_rte(half4);\n" |
| 32454 | "char4 __ovld __cnfn convert_char4_rtp(half4);\n" |
| 32455 | "char4 __ovld __cnfn convert_char4_rtn(half4);\n" |
| 32456 | "char4 __ovld __cnfn convert_char4_rtz(half4);\n" |
| 32457 | "char4 __ovld __cnfn convert_char4_sat(half4);\n" |
| 32458 | "char4 __ovld __cnfn convert_char4_sat_rte(half4);\n" |
| 32459 | "char4 __ovld __cnfn convert_char4_sat_rtp(half4);\n" |
| 32460 | "char4 __ovld __cnfn convert_char4_sat_rtn(half4);\n" |
| 32461 | "char4 __ovld __cnfn convert_char4_sat_rtz(half4);\n" |
| 32462 | "char8 __ovld __cnfn convert_char8(half8);\n" |
| 32463 | "char8 __ovld __cnfn convert_char8_rte(half8);\n" |
| 32464 | "char8 __ovld __cnfn convert_char8_rtp(half8);\n" |
| 32465 | "char8 __ovld __cnfn convert_char8_rtn(half8);\n" |
| 32466 | "char8 __ovld __cnfn convert_char8_rtz(half8);\n" |
| 32467 | "char8 __ovld __cnfn convert_char8_sat(half8);\n" |
| 32468 | "char8 __ovld __cnfn convert_char8_sat_rte(half8);\n" |
| 32469 | "char8 __ovld __cnfn convert_char8_sat_rtp(half8);\n" |
| 32470 | "char8 __ovld __cnfn convert_char8_sat_rtn(half8);\n" |
| 32471 | "char8 __ovld __cnfn convert_char8_sat_rtz(half8);\n" |
| 32472 | "char16 __ovld __cnfn convert_char16(half16);\n" |
| 32473 | "char16 __ovld __cnfn convert_char16_rte(half16);\n" |
| 32474 | "char16 __ovld __cnfn convert_char16_rtp(half16);\n" |
| 32475 | "char16 __ovld __cnfn convert_char16_rtn(half16);\n" |
| 32476 | "char16 __ovld __cnfn convert_char16_rtz(half16);\n" |
| 32477 | "char16 __ovld __cnfn convert_char16_sat(half16);\n" |
| 32478 | "char16 __ovld __cnfn convert_char16_sat_rte(half16);\n" |
| 32479 | "char16 __ovld __cnfn convert_char16_sat_rtp(half16);\n" |
| 32480 | "char16 __ovld __cnfn convert_char16_sat_rtn(half16);\n" |
| 32481 | "char16 __ovld __cnfn convert_char16_sat_rtz(half16);\n" |
| 32482 | "short __ovld __cnfn convert_short(half);\n" |
| 32483 | "short __ovld __cnfn convert_short_rte(half);\n" |
| 32484 | "short __ovld __cnfn convert_short_rtp(half);\n" |
| 32485 | "short __ovld __cnfn convert_short_rtn(half);\n" |
| 32486 | "short __ovld __cnfn convert_short_rtz(half);\n" |
| 32487 | "short __ovld __cnfn convert_short_sat(half);\n" |
| 32488 | "short __ovld __cnfn convert_short_sat_rte(half);\n" |
| 32489 | "short __ovld __cnfn convert_short_sat_rtp(half);\n" |
| 32490 | "short __ovld __cnfn convert_short_sat_rtn(half);\n" |
| 32491 | "short __ovld __cnfn convert_short_sat_rtz(half);\n" |
| 32492 | "short2 __ovld __cnfn convert_short2(half2);\n" |
| 32493 | "short2 __ovld __cnfn convert_short2_rte(half2);\n" |
| 32494 | "short2 __ovld __cnfn convert_short2_rtp(half2);\n" |
| 32495 | "short2 __ovld __cnfn convert_short2_rtn(half2);\n" |
| 32496 | "short2 __ovld __cnfn convert_short2_rtz(half2);\n" |
| 32497 | "short2 __ovld __cnfn convert_short2_sat(half2);\n" |
| 32498 | "short2 __ovld __cnfn convert_short2_sat_rte(half2);\n" |
| 32499 | "short2 __ovld __cnfn convert_short2_sat_rtp(half2);\n" |
| 32500 | "short2 __ovld __cnfn convert_short2_sat_rtn(half2);\n" |
| 32501 | "short2 __ovld __cnfn convert_short2_sat_rtz(half2);\n" |
| 32502 | "short3 __ovld __cnfn convert_short3(half3);\n" |
| 32503 | "short3 __ovld __cnfn convert_short3_rte(half3);\n" |
| 32504 | "short3 __ovld __cnfn convert_short3_rtp(half3);\n" |
| 32505 | "short3 __ovld __cnfn convert_short3_rtn(half3);\n" |
| 32506 | "short3 __ovld __cnfn convert_short3_rtz(half3);\n" |
| 32507 | "short3 __ovld __cnfn convert_short3_sat(half3);\n" |
| 32508 | "short3 __ovld __cnfn convert_short3_sat_rte(half3);\n" |
| 32509 | "short3 __ovld __cnfn convert_short3_sat_rtp(half3);\n" |
| 32510 | "short3 __ovld __cnfn convert_short3_sat_rtn(half3);\n" |
| 32511 | "short3 __ovld __cnfn convert_short3_sat_rtz(half3);\n" |
| 32512 | "short4 __ovld __cnfn convert_short4(half4);\n" |
| 32513 | "short4 __ovld __cnfn convert_short4_rte(half4);\n" |
| 32514 | "short4 __ovld __cnfn convert_short4_rtp(half4);\n" |
| 32515 | "short4 __ovld __cnfn convert_short4_rtn(half4);\n" |
| 32516 | "short4 __ovld __cnfn convert_short4_rtz(half4);\n" |
| 32517 | "short4 __ovld __cnfn convert_short4_sat(half4);\n" |
| 32518 | "short4 __ovld __cnfn convert_short4_sat_rte(half4);\n" |
| 32519 | "short4 __ovld __cnfn convert_short4_sat_rtp(half4);\n" |
| 32520 | "short4 __ovld __cnfn convert_short4_sat_rtn(half4);\n" |
| 32521 | "short4 __ovld __cnfn convert_short4_sat_rtz(half4);\n" |
| 32522 | "short8 __ovld __cnfn convert_short8(half8);\n" |
| 32523 | "short8 __ovld __cnfn convert_short8_rte(half8);\n" |
| 32524 | "short8 __ovld __cnfn convert_short8_rtp(half8);\n" |
| 32525 | "short8 __ovld __cnfn convert_short8_rtn(half8);\n" |
| 32526 | "short8 __ovld __cnfn convert_short8_rtz(half8);\n" |
| 32527 | "short8 __ovld __cnfn convert_short8_sat(half8);\n" |
| 32528 | "short8 __ovld __cnfn convert_short8_sat_rte(half8);\n" |
| 32529 | "short8 __ovld __cnfn convert_short8_sat_rtp(half8);\n" |
| 32530 | "short8 __ovld __cnfn convert_short8_sat_rtn(half8);\n" |
| 32531 | "short8 __ovld __cnfn convert_short8_sat_rtz(half8);\n" |
| 32532 | "short16 __ovld __cnfn convert_short16(half16);\n" |
| 32533 | "short16 __ovld __cnfn convert_short16_rte(half16);\n" |
| 32534 | "short16 __ovld __cnfn convert_short16_rtp(half16);\n" |
| 32535 | "short16 __ovld __cnfn convert_short16_rtn(half16);\n" |
| 32536 | "short16 __ovld __cnfn convert_short16_rtz(half16);\n" |
| 32537 | "short16 __ovld __cnfn convert_short16_sat(half16);\n" |
| 32538 | "short16 __ovld __cnfn convert_short16_sat_rte(half16);\n" |
| 32539 | "short16 __ovld __cnfn convert_short16_sat_rtp(half16);\n" |
| 32540 | "short16 __ovld __cnfn convert_short16_sat_rtn(half16);\n" |
| 32541 | "short16 __ovld __cnfn convert_short16_sat_rtz(half16);\n" |
| 32542 | "int __ovld __cnfn convert_int(half);\n" |
| 32543 | "int __ovld __cnfn convert_int_rte(half);\n" |
| 32544 | "int __ovld __cnfn convert_int_rtp(half);\n" |
| 32545 | "int __ovld __cnfn convert_int_rtn(half);\n" |
| 32546 | "int __ovld __cnfn convert_int_rtz(half);\n" |
| 32547 | "int __ovld __cnfn convert_int_sat(half);\n" |
| 32548 | "int __ovld __cnfn convert_int_sat_rte(half);\n" |
| 32549 | "int __ovld __cnfn convert_int_sat_rtp(half);\n" |
| 32550 | "int __ovld __cnfn convert_int_sat_rtn(half);\n" |
| 32551 | "int __ovld __cnfn convert_int_sat_rtz(half);\n" |
| 32552 | "int2 __ovld __cnfn convert_int2(half2);\n" |
| 32553 | "int2 __ovld __cnfn convert_int2_rte(half2);\n" |
| 32554 | "int2 __ovld __cnfn convert_int2_rtp(half2);\n" |
| 32555 | "int2 __ovld __cnfn convert_int2_rtn(half2);\n" |
| 32556 | "int2 __ovld __cnfn convert_int2_rtz(half2);\n" |
| 32557 | "int2 __ovld __cnfn convert_int2_sat(half2);\n" |
| 32558 | "int2 __ovld __cnfn convert_int2_sat_rte(half2);\n" |
| 32559 | "int2 __ovld __cnfn convert_int2_sat_rtp(half2);\n" |
| 32560 | "int2 __ovld __cnfn convert_int2_sat_rtn(half2);\n" |
| 32561 | "int2 __ovld __cnfn convert_int2_sat_rtz(half2);\n" |
| 32562 | "int3 __ovld __cnfn convert_int3(half3);\n" |
| 32563 | "int3 __ovld __cnfn convert_int3_rte(half3);\n" |
| 32564 | "int3 __ovld __cnfn convert_int3_rtp(half3);\n" |
| 32565 | "int3 __ovld __cnfn convert_int3_rtn(half3);\n" |
| 32566 | "int3 __ovld __cnfn convert_int3_rtz(half3);\n" |
| 32567 | "int3 __ovld __cnfn convert_int3_sat(half3);\n" |
| 32568 | "int3 __ovld __cnfn convert_int3_sat_rte(half3);\n" |
| 32569 | "int3 __ovld __cnfn convert_int3_sat_rtp(half3);\n" |
| 32570 | "int3 __ovld __cnfn convert_int3_sat_rtn(half3);\n" |
| 32571 | "int3 __ovld __cnfn convert_int3_sat_rtz(half3);\n" |
| 32572 | "int4 __ovld __cnfn convert_int4(half4);\n" |
| 32573 | "int4 __ovld __cnfn convert_int4_rte(half4);\n" |
| 32574 | "int4 __ovld __cnfn convert_int4_rtp(half4);\n" |
| 32575 | "int4 __ovld __cnfn convert_int4_rtn(half4);\n" |
| 32576 | "int4 __ovld __cnfn convert_int4_rtz(half4);\n" |
| 32577 | "int4 __ovld __cnfn convert_int4_sat(half4);\n" |
| 32578 | "int4 __ovld __cnfn convert_int4_sat_rte(half4);\n" |
| 32579 | "int4 __ovld __cnfn convert_int4_sat_rtp(half4);\n" |
| 32580 | "int4 __ovld __cnfn convert_int4_sat_rtn(half4);\n" |
| 32581 | "int4 __ovld __cnfn convert_int4_sat_rtz(half4);\n" |
| 32582 | "int8 __ovld __cnfn convert_int8(half8);\n" |
| 32583 | "int8 __ovld __cnfn convert_int8_rte(half8);\n" |
| 32584 | "int8 __ovld __cnfn convert_int8_rtp(half8);\n" |
| 32585 | "int8 __ovld __cnfn convert_int8_rtn(half8);\n" |
| 32586 | "int8 __ovld __cnfn convert_int8_rtz(half8);\n" |
| 32587 | "int8 __ovld __cnfn convert_int8_sat(half8);\n" |
| 32588 | "int8 __ovld __cnfn convert_int8_sat_rte(half8);\n" |
| 32589 | "int8 __ovld __cnfn convert_int8_sat_rtp(half8);\n" |
| 32590 | "int8 __ovld __cnfn convert_int8_sat_rtn(half8);\n" |
| 32591 | "int8 __ovld __cnfn convert_int8_sat_rtz(half8);\n" |
| 32592 | "int16 __ovld __cnfn convert_int16(half16);\n" |
| 32593 | "int16 __ovld __cnfn convert_int16_rte(half16);\n" |
| 32594 | "int16 __ovld __cnfn convert_int16_rtp(half16);\n" |
| 32595 | "int16 __ovld __cnfn convert_int16_rtn(half16);\n" |
| 32596 | "int16 __ovld __cnfn convert_int16_rtz(half16);\n" |
| 32597 | "int16 __ovld __cnfn convert_int16_sat(half16);\n" |
| 32598 | "int16 __ovld __cnfn convert_int16_sat_rte(half16);\n" |
| 32599 | "int16 __ovld __cnfn convert_int16_sat_rtp(half16);\n" |
| 32600 | "int16 __ovld __cnfn convert_int16_sat_rtn(half16);\n" |
| 32601 | "int16 __ovld __cnfn convert_int16_sat_rtz(half16);\n" |
| 32602 | "long __ovld __cnfn convert_long(half);\n" |
| 32603 | "long __ovld __cnfn convert_long_rte(half);\n" |
| 32604 | "long __ovld __cnfn convert_long_rtp(half);\n" |
| 32605 | "long __ovld __cnfn convert_long_rtn(half);\n" |
| 32606 | "long __ovld __cnfn convert_long_rtz(half);\n" |
| 32607 | "long __ovld __cnfn convert_long_sat(half);\n" |
| 32608 | "long __ovld __cnfn convert_long_sat_rte(half);\n" |
| 32609 | "long __ovld __cnfn convert_long_sat_rtp(half);\n" |
| 32610 | "long __ovld __cnfn convert_long_sat_rtn(half);\n" |
| 32611 | "long __ovld __cnfn convert_long_sat_rtz(half);\n" |
| 32612 | "long2 __ovld __cnfn convert_long2(half2);\n" |
| 32613 | "long2 __ovld __cnfn convert_long2_rte(half2);\n" |
| 32614 | "long2 __ovld __cnfn convert_long2_rtp(half2);\n" |
| 32615 | "long2 __ovld __cnfn convert_long2_rtn(half2);\n" |
| 32616 | "long2 __ovld __cnfn convert_long2_rtz(half2);\n" |
| 32617 | "long2 __ovld __cnfn convert_long2_sat(half2);\n" |
| 32618 | "long2 __ovld __cnfn convert_long2_sat_rte(half2);\n" |
| 32619 | "long2 __ovld __cnfn convert_long2_sat_rtp(half2);\n" |
| 32620 | "long2 __ovld __cnfn convert_long2_sat_rtn(half2);\n" |
| 32621 | "long2 __ovld __cnfn convert_long2_sat_rtz(half2);\n" |
| 32622 | "long3 __ovld __cnfn convert_long3(half3);\n" |
| 32623 | "long3 __ovld __cnfn convert_long3_rte(half3);\n" |
| 32624 | "long3 __ovld __cnfn convert_long3_rtp(half3);\n" |
| 32625 | "long3 __ovld __cnfn convert_long3_rtn(half3);\n" |
| 32626 | "long3 __ovld __cnfn convert_long3_rtz(half3);\n" |
| 32627 | "long3 __ovld __cnfn convert_long3_sat(half3);\n" |
| 32628 | "long3 __ovld __cnfn convert_long3_sat_rte(half3);\n" |
| 32629 | "long3 __ovld __cnfn convert_long3_sat_rtp(half3);\n" |
| 32630 | "long3 __ovld __cnfn convert_long3_sat_rtn(half3);\n" |
| 32631 | "long3 __ovld __cnfn convert_long3_sat_rtz(half3);\n" |
| 32632 | "long4 __ovld __cnfn convert_long4(half4);\n" |
| 32633 | "long4 __ovld __cnfn convert_long4_rte(half4);\n" |
| 32634 | "long4 __ovld __cnfn convert_long4_rtp(half4);\n" |
| 32635 | "long4 __ovld __cnfn convert_long4_rtn(half4);\n" |
| 32636 | "long4 __ovld __cnfn convert_long4_rtz(half4);\n" |
| 32637 | "long4 __ovld __cnfn convert_long4_sat(half4);\n" |
| 32638 | "long4 __ovld __cnfn convert_long4_sat_rte(half4);\n" |
| 32639 | "long4 __ovld __cnfn convert_long4_sat_rtp(half4);\n" |
| 32640 | "long4 __ovld __cnfn convert_long4_sat_rtn(half4);\n" |
| 32641 | "long4 __ovld __cnfn convert_long4_sat_rtz(half4);\n" |
| 32642 | "long8 __ovld __cnfn convert_long8(half8);\n" |
| 32643 | "long8 __ovld __cnfn convert_long8_rte(half8);\n" |
| 32644 | "long8 __ovld __cnfn convert_long8_rtp(half8);\n" |
| 32645 | "long8 __ovld __cnfn convert_long8_rtn(half8);\n" |
| 32646 | "long8 __ovld __cnfn convert_long8_rtz(half8);\n" |
| 32647 | "long8 __ovld __cnfn convert_long8_sat(half8);\n" |
| 32648 | "long8 __ovld __cnfn convert_long8_sat_rte(half8);\n" |
| 32649 | "long8 __ovld __cnfn convert_long8_sat_rtp(half8);\n" |
| 32650 | "long8 __ovld __cnfn convert_long8_sat_rtn(half8);\n" |
| 32651 | "long8 __ovld __cnfn convert_long8_sat_rtz(half8);\n" |
| 32652 | "long16 __ovld __cnfn convert_long16(half16);\n" |
| 32653 | "long16 __ovld __cnfn convert_long16_rte(half16);\n" |
| 32654 | "long16 __ovld __cnfn convert_long16_rtp(half16);\n" |
| 32655 | "long16 __ovld __cnfn convert_long16_rtn(half16);\n" |
| 32656 | "long16 __ovld __cnfn convert_long16_rtz(half16);\n" |
| 32657 | "long16 __ovld __cnfn convert_long16_sat(half16);\n" |
| 32658 | "long16 __ovld __cnfn convert_long16_sat_rte(half16);\n" |
| 32659 | "long16 __ovld __cnfn convert_long16_sat_rtp(half16);\n" |
| 32660 | "long16 __ovld __cnfn convert_long16_sat_rtn(half16);\n" |
| 32661 | "long16 __ovld __cnfn convert_long16_sat_rtz(half16);\n" |
| 32662 | "float __ovld __cnfn convert_float(half);\n" |
| 32663 | "float __ovld __cnfn convert_float_rte(half);\n" |
| 32664 | "float __ovld __cnfn convert_float_rtp(half);\n" |
| 32665 | "float __ovld __cnfn convert_float_rtn(half);\n" |
| 32666 | "float __ovld __cnfn convert_float_rtz(half);\n" |
| 32667 | "float2 __ovld __cnfn convert_float2(half2);\n" |
| 32668 | "float2 __ovld __cnfn convert_float2_rte(half2);\n" |
| 32669 | "float2 __ovld __cnfn convert_float2_rtp(half2);\n" |
| 32670 | "float2 __ovld __cnfn convert_float2_rtn(half2);\n" |
| 32671 | "float2 __ovld __cnfn convert_float2_rtz(half2);\n" |
| 32672 | "float3 __ovld __cnfn convert_float3(half3);\n" |
| 32673 | "float3 __ovld __cnfn convert_float3_rte(half3);\n" |
| 32674 | "float3 __ovld __cnfn convert_float3_rtp(half3);\n" |
| 32675 | "float3 __ovld __cnfn convert_float3_rtn(half3);\n" |
| 32676 | "float3 __ovld __cnfn convert_float3_rtz(half3);\n" |
| 32677 | "float4 __ovld __cnfn convert_float4(half4);\n" |
| 32678 | "float4 __ovld __cnfn convert_float4_rte(half4);\n" |
| 32679 | "float4 __ovld __cnfn convert_float4_rtp(half4);\n" |
| 32680 | "float4 __ovld __cnfn convert_float4_rtn(half4);\n" |
| 32681 | "float4 __ovld __cnfn convert_float4_rtz(half4);\n" |
| 32682 | "float8 __ovld __cnfn convert_float8(half8);\n" |
| 32683 | "float8 __ovld __cnfn convert_float8_rte(half8);\n" |
| 32684 | "float8 __ovld __cnfn convert_float8_rtp(half8);\n" |
| 32685 | "float8 __ovld __cnfn convert_float8_rtn(half8);\n" |
| 32686 | "float8 __ovld __cnfn convert_float8_rtz(half8);\n" |
| 32687 | "float16 __ovld __cnfn convert_float16(half16);\n" |
| 32688 | "float16 __ovld __cnfn convert_float16_rte(half16);\n" |
| 32689 | "float16 __ovld __cnfn convert_float16_rtp(half16);\n" |
| 32690 | "float16 __ovld __cnfn convert_float16_rtn(half16);\n" |
| 32691 | "float16 __ovld __cnfn convert_float16_rtz(half16);\n" |
| 32692 | "\n" |
| 32693 | "// Convert non-double types to half types.\n" |
| 32694 | "half __ovld __cnfn convert_half(uchar);\n" |
| 32695 | "half __ovld __cnfn convert_half(ushort);\n" |
| 32696 | "half __ovld __cnfn convert_half(uint);\n" |
| 32697 | "half __ovld __cnfn convert_half(ulong);\n" |
| 32698 | "half __ovld __cnfn convert_half(char);\n" |
| 32699 | "half __ovld __cnfn convert_half(short);\n" |
| 32700 | "half __ovld __cnfn convert_half(int);\n" |
| 32701 | "half __ovld __cnfn convert_half(long);\n" |
| 32702 | "half __ovld __cnfn convert_half(float);\n" |
| 32703 | "half __ovld __cnfn convert_half(half);\n" |
| 32704 | "half __ovld __cnfn convert_half_rte(uchar);\n" |
| 32705 | "half __ovld __cnfn convert_half_rte(ushort);\n" |
| 32706 | "half __ovld __cnfn convert_half_rte(uint);\n" |
| 32707 | "half __ovld __cnfn convert_half_rte(ulong);\n" |
| 32708 | "half __ovld __cnfn convert_half_rte(char);\n" |
| 32709 | "half __ovld __cnfn convert_half_rte(short);\n" |
| 32710 | "half __ovld __cnfn convert_half_rte(int);\n" |
| 32711 | "half __ovld __cnfn convert_half_rte(long);\n" |
| 32712 | "half __ovld __cnfn convert_half_rte(float);\n" |
| 32713 | "half __ovld __cnfn convert_half_rte(half);\n" |
| 32714 | "half __ovld __cnfn convert_half_rtp(uchar);\n" |
| 32715 | "half __ovld __cnfn convert_half_rtp(ushort);\n" |
| 32716 | "half __ovld __cnfn convert_half_rtp(uint);\n" |
| 32717 | "half __ovld __cnfn convert_half_rtp(ulong);\n" |
| 32718 | "half __ovld __cnfn convert_half_rtp(char);\n" |
| 32719 | "half __ovld __cnfn convert_half_rtp(short);\n" |
| 32720 | "half __ovld __cnfn convert_half_rtp(int);\n" |
| 32721 | "half __ovld __cnfn convert_half_rtp(long);\n" |
| 32722 | "half __ovld __cnfn convert_half_rtp(float);\n" |
| 32723 | "half __ovld __cnfn convert_half_rtp(half);\n" |
| 32724 | "half __ovld __cnfn convert_half_rtn(uchar);\n" |
| 32725 | "half __ovld __cnfn convert_half_rtn(ushort);\n" |
| 32726 | "half __ovld __cnfn convert_half_rtn(uint);\n" |
| 32727 | "half __ovld __cnfn convert_half_rtn(ulong);\n" |
| 32728 | "half __ovld __cnfn convert_half_rtn(char);\n" |
| 32729 | "half __ovld __cnfn convert_half_rtn(short);\n" |
| 32730 | "half __ovld __cnfn convert_half_rtn(int);\n" |
| 32731 | "half __ovld __cnfn convert_half_rtn(long);\n" |
| 32732 | "half __ovld __cnfn convert_half_rtn(float);\n" |
| 32733 | "half __ovld __cnfn convert_half_rtn(half);\n" |
| 32734 | "half __ovld __cnfn convert_half_rtz(uchar);\n" |
| 32735 | "half __ovld __cnfn convert_half_rtz(ushort);\n" |
| 32736 | "half __ovld __cnfn convert_half_rtz(uint);\n" |
| 32737 | "half __ovld __cnfn convert_half_rtz(ulong);\n" |
| 32738 | "half __ovld __cnfn convert_half_rtz(char);\n" |
| 32739 | "half __ovld __cnfn convert_half_rtz(short);\n" |
| 32740 | "half __ovld __cnfn convert_half_rtz(int);\n" |
| 32741 | "half __ovld __cnfn convert_half_rtz(long);\n" |
| 32742 | "half __ovld __cnfn convert_half_rtz(float);\n" |
| 32743 | "half __ovld __cnfn convert_half_rtz(half);\n" |
| 32744 | "half2 __ovld __cnfn convert_half2(char2);\n" |
| 32745 | "half2 __ovld __cnfn convert_half2(uchar2);\n" |
| 32746 | "half2 __ovld __cnfn convert_half2(short2);\n" |
| 32747 | "half2 __ovld __cnfn convert_half2(ushort2);\n" |
| 32748 | "half2 __ovld __cnfn convert_half2(int2);\n" |
| 32749 | "half2 __ovld __cnfn convert_half2(uint2);\n" |
| 32750 | "half2 __ovld __cnfn convert_half2(long2);\n" |
| 32751 | "half2 __ovld __cnfn convert_half2(ulong2);\n" |
| 32752 | "half2 __ovld __cnfn convert_half2(float2);\n" |
| 32753 | "half2 __ovld __cnfn convert_half2(half2);\n" |
| 32754 | "half2 __ovld __cnfn convert_half2_rte(char2);\n" |
| 32755 | "half2 __ovld __cnfn convert_half2_rte(uchar2);\n" |
| 32756 | "half2 __ovld __cnfn convert_half2_rte(short2);\n" |
| 32757 | "half2 __ovld __cnfn convert_half2_rte(ushort2);\n" |
| 32758 | "half2 __ovld __cnfn convert_half2_rte(int2);\n" |
| 32759 | "half2 __ovld __cnfn convert_half2_rte(uint2);\n" |
| 32760 | "half2 __ovld __cnfn convert_half2_rte(long2);\n" |
| 32761 | "half2 __ovld __cnfn convert_half2_rte(ulong2);\n" |
| 32762 | "half2 __ovld __cnfn convert_half2_rte(float2);\n" |
| 32763 | "half2 __ovld __cnfn convert_half2_rte(half2);\n" |
| 32764 | "half2 __ovld __cnfn convert_half2_rtp(char2);\n" |
| 32765 | "half2 __ovld __cnfn convert_half2_rtp(uchar2);\n" |
| 32766 | "half2 __ovld __cnfn convert_half2_rtp(short2);\n" |
| 32767 | "half2 __ovld __cnfn convert_half2_rtp(ushort2);\n" |
| 32768 | "half2 __ovld __cnfn convert_half2_rtp(int2);\n" |
| 32769 | "half2 __ovld __cnfn convert_half2_rtp(uint2);\n" |
| 32770 | "half2 __ovld __cnfn convert_half2_rtp(long2);\n" |
| 32771 | "half2 __ovld __cnfn convert_half2_rtp(ulong2);\n" |
| 32772 | "half2 __ovld __cnfn convert_half2_rtp(float2);\n" |
| 32773 | "half2 __ovld __cnfn convert_half2_rtp(half2);\n" |
| 32774 | "half2 __ovld __cnfn convert_half2_rtn(char2);\n" |
| 32775 | "half2 __ovld __cnfn convert_half2_rtn(uchar2);\n" |
| 32776 | "half2 __ovld __cnfn convert_half2_rtn(short2);\n" |
| 32777 | "half2 __ovld __cnfn convert_half2_rtn(ushort2);\n" |
| 32778 | "half2 __ovld __cnfn convert_half2_rtn(int2);\n" |
| 32779 | "half2 __ovld __cnfn convert_half2_rtn(uint2);\n" |
| 32780 | "half2 __ovld __cnfn convert_half2_rtn(long2);\n" |
| 32781 | "half2 __ovld __cnfn convert_half2_rtn(ulong2);\n" |
| 32782 | "half2 __ovld __cnfn convert_half2_rtn(float2);\n" |
| 32783 | "half2 __ovld __cnfn convert_half2_rtn(half2);\n" |
| 32784 | "half2 __ovld __cnfn convert_half2_rtz(char2);\n" |
| 32785 | "half2 __ovld __cnfn convert_half2_rtz(uchar2);\n" |
| 32786 | "half2 __ovld __cnfn convert_half2_rtz(short2);\n" |
| 32787 | "half2 __ovld __cnfn convert_half2_rtz(ushort2);\n" |
| 32788 | "half2 __ovld __cnfn convert_half2_rtz(int2);\n" |
| 32789 | "half2 __ovld __cnfn convert_half2_rtz(uint2);\n" |
| 32790 | "half2 __ovld __cnfn convert_half2_rtz(long2);\n" |
| 32791 | "half2 __ovld __cnfn convert_half2_rtz(ulong2);\n" |
| 32792 | "half2 __ovld __cnfn convert_half2_rtz(float2);\n" |
| 32793 | "half2 __ovld __cnfn convert_half2_rtz(half2);\n" |
| 32794 | "half3 __ovld __cnfn convert_half3(char3);\n" |
| 32795 | "half3 __ovld __cnfn convert_half3(uchar3);\n" |
| 32796 | "half3 __ovld __cnfn convert_half3(short3);\n" |
| 32797 | "half3 __ovld __cnfn convert_half3(ushort3);\n" |
| 32798 | "half3 __ovld __cnfn convert_half3(int3);\n" |
| 32799 | "half3 __ovld __cnfn convert_half3(uint3);\n" |
| 32800 | "half3 __ovld __cnfn convert_half3(long3);\n" |
| 32801 | "half3 __ovld __cnfn convert_half3(ulong3);\n" |
| 32802 | "half3 __ovld __cnfn convert_half3(float3);\n" |
| 32803 | "half3 __ovld __cnfn convert_half3(half3);\n" |
| 32804 | "half3 __ovld __cnfn convert_half3_rte(char3);\n" |
| 32805 | "half3 __ovld __cnfn convert_half3_rte(uchar3);\n" |
| 32806 | "half3 __ovld __cnfn convert_half3_rte(short3);\n" |
| 32807 | "half3 __ovld __cnfn convert_half3_rte(ushort3);\n" |
| 32808 | "half3 __ovld __cnfn convert_half3_rte(int3);\n" |
| 32809 | "half3 __ovld __cnfn convert_half3_rte(uint3);\n" |
| 32810 | "half3 __ovld __cnfn convert_half3_rte(long3);\n" |
| 32811 | "half3 __ovld __cnfn convert_half3_rte(ulong3);\n" |
| 32812 | "half3 __ovld __cnfn convert_half3_rte(float3);\n" |
| 32813 | "half3 __ovld __cnfn convert_half3_rte(half3);\n" |
| 32814 | "half3 __ovld __cnfn convert_half3_rtp(char3);\n" |
| 32815 | "half3 __ovld __cnfn convert_half3_rtp(uchar3);\n" |
| 32816 | "half3 __ovld __cnfn convert_half3_rtp(short3);\n" |
| 32817 | "half3 __ovld __cnfn convert_half3_rtp(ushort3);\n" |
| 32818 | "half3 __ovld __cnfn convert_half3_rtp(int3);\n" |
| 32819 | "half3 __ovld __cnfn convert_half3_rtp(uint3);\n" |
| 32820 | "half3 __ovld __cnfn convert_half3_rtp(long3);\n" |
| 32821 | "half3 __ovld __cnfn convert_half3_rtp(ulong3);\n" |
| 32822 | "half3 __ovld __cnfn convert_half3_rtp(float3);\n" |
| 32823 | "half3 __ovld __cnfn convert_half3_rtp(half3);\n" |
| 32824 | "half3 __ovld __cnfn convert_half3_rtn(char3);\n" |
| 32825 | "half3 __ovld __cnfn convert_half3_rtn(uchar3);\n" |
| 32826 | "half3 __ovld __cnfn convert_half3_rtn(short3);\n" |
| 32827 | "half3 __ovld __cnfn convert_half3_rtn(ushort3);\n" |
| 32828 | "half3 __ovld __cnfn convert_half3_rtn(int3);\n" |
| 32829 | "half3 __ovld __cnfn convert_half3_rtn(uint3);\n" |
| 32830 | "half3 __ovld __cnfn convert_half3_rtn(long3);\n" |
| 32831 | "half3 __ovld __cnfn convert_half3_rtn(ulong3);\n" |
| 32832 | "half3 __ovld __cnfn convert_half3_rtn(float3);\n" |
| 32833 | "half3 __ovld __cnfn convert_half3_rtn(half3);\n" |
| 32834 | "half3 __ovld __cnfn convert_half3_rtz(char3);\n" |
| 32835 | "half3 __ovld __cnfn convert_half3_rtz(uchar3);\n" |
| 32836 | "half3 __ovld __cnfn convert_half3_rtz(short3);\n" |
| 32837 | "half3 __ovld __cnfn convert_half3_rtz(ushort3);\n" |
| 32838 | "half3 __ovld __cnfn convert_half3_rtz(int3);\n" |
| 32839 | "half3 __ovld __cnfn convert_half3_rtz(uint3);\n" |
| 32840 | "half3 __ovld __cnfn convert_half3_rtz(long3);\n" |
| 32841 | "half3 __ovld __cnfn convert_half3_rtz(ulong3);\n" |
| 32842 | "half3 __ovld __cnfn convert_half3_rtz(float3);\n" |
| 32843 | "half3 __ovld __cnfn convert_half3_rtz(half3);\n" |
| 32844 | "half4 __ovld __cnfn convert_half4(char4);\n" |
| 32845 | "half4 __ovld __cnfn convert_half4(uchar4);\n" |
| 32846 | "half4 __ovld __cnfn convert_half4(short4);\n" |
| 32847 | "half4 __ovld __cnfn convert_half4(ushort4);\n" |
| 32848 | "half4 __ovld __cnfn convert_half4(int4);\n" |
| 32849 | "half4 __ovld __cnfn convert_half4(uint4);\n" |
| 32850 | "half4 __ovld __cnfn convert_half4(long4);\n" |
| 32851 | "half4 __ovld __cnfn convert_half4(ulong4);\n" |
| 32852 | "half4 __ovld __cnfn convert_half4(float4);\n" |
| 32853 | "half4 __ovld __cnfn convert_half4(half4);\n" |
| 32854 | "half4 __ovld __cnfn convert_half4_rte(char4);\n" |
| 32855 | "half4 __ovld __cnfn convert_half4_rte(uchar4);\n" |
| 32856 | "half4 __ovld __cnfn convert_half4_rte(short4);\n" |
| 32857 | "half4 __ovld __cnfn convert_half4_rte(ushort4);\n" |
| 32858 | "half4 __ovld __cnfn convert_half4_rte(int4);\n" |
| 32859 | "half4 __ovld __cnfn convert_half4_rte(uint4);\n" |
| 32860 | "half4 __ovld __cnfn convert_half4_rte(long4);\n" |
| 32861 | "half4 __ovld __cnfn convert_half4_rte(ulong4);\n" |
| 32862 | "half4 __ovld __cnfn convert_half4_rte(float4);\n" |
| 32863 | "half4 __ovld __cnfn convert_half4_rte(half4);\n" |
| 32864 | "half4 __ovld __cnfn convert_half4_rtp(char4);\n" |
| 32865 | "half4 __ovld __cnfn convert_half4_rtp(uchar4);\n" |
| 32866 | "half4 __ovld __cnfn convert_half4_rtp(short4);\n" |
| 32867 | "half4 __ovld __cnfn convert_half4_rtp(ushort4);\n" |
| 32868 | "half4 __ovld __cnfn convert_half4_rtp(int4);\n" |
| 32869 | "half4 __ovld __cnfn convert_half4_rtp(uint4);\n" |
| 32870 | "half4 __ovld __cnfn convert_half4_rtp(long4);\n" |
| 32871 | "half4 __ovld __cnfn convert_half4_rtp(ulong4);\n" |
| 32872 | "half4 __ovld __cnfn convert_half4_rtp(float4);\n" |
| 32873 | "half4 __ovld __cnfn convert_half4_rtp(half4);\n" |
| 32874 | "half4 __ovld __cnfn convert_half4_rtn(char4);\n" |
| 32875 | "half4 __ovld __cnfn convert_half4_rtn(uchar4);\n" |
| 32876 | "half4 __ovld __cnfn convert_half4_rtn(short4);\n" |
| 32877 | "half4 __ovld __cnfn convert_half4_rtn(ushort4);\n" |
| 32878 | "half4 __ovld __cnfn convert_half4_rtn(int4);\n" |
| 32879 | "half4 __ovld __cnfn convert_half4_rtn(uint4);\n" |
| 32880 | "half4 __ovld __cnfn convert_half4_rtn(long4);\n" |
| 32881 | "half4 __ovld __cnfn convert_half4_rtn(ulong4);\n" |
| 32882 | "half4 __ovld __cnfn convert_half4_rtn(float4);\n" |
| 32883 | "half4 __ovld __cnfn convert_half4_rtn(half4);\n" |
| 32884 | "half4 __ovld __cnfn convert_half4_rtz(char4);\n" |
| 32885 | "half4 __ovld __cnfn convert_half4_rtz(uchar4);\n" |
| 32886 | "half4 __ovld __cnfn convert_half4_rtz(short4);\n" |
| 32887 | "half4 __ovld __cnfn convert_half4_rtz(ushort4);\n" |
| 32888 | "half4 __ovld __cnfn convert_half4_rtz(int4);\n" |
| 32889 | "half4 __ovld __cnfn convert_half4_rtz(uint4);\n" |
| 32890 | "half4 __ovld __cnfn convert_half4_rtz(long4);\n" |
| 32891 | "half4 __ovld __cnfn convert_half4_rtz(ulong4);\n" |
| 32892 | "half4 __ovld __cnfn convert_half4_rtz(float4);\n" |
| 32893 | "half4 __ovld __cnfn convert_half4_rtz(half4);\n" |
| 32894 | "half8 __ovld __cnfn convert_half8(char8);\n" |
| 32895 | "half8 __ovld __cnfn convert_half8(uchar8);\n" |
| 32896 | "half8 __ovld __cnfn convert_half8(short8);\n" |
| 32897 | "half8 __ovld __cnfn convert_half8(ushort8);\n" |
| 32898 | "half8 __ovld __cnfn convert_half8(int8);\n" |
| 32899 | "half8 __ovld __cnfn convert_half8(uint8);\n" |
| 32900 | "half8 __ovld __cnfn convert_half8(long8);\n" |
| 32901 | "half8 __ovld __cnfn convert_half8(ulong8);\n" |
| 32902 | "half8 __ovld __cnfn convert_half8(float8);\n" |
| 32903 | "half8 __ovld __cnfn convert_half8(half8);\n" |
| 32904 | "half8 __ovld __cnfn convert_half8_rte(char8);\n" |
| 32905 | "half8 __ovld __cnfn convert_half8_rte(uchar8);\n" |
| 32906 | "half8 __ovld __cnfn convert_half8_rte(short8);\n" |
| 32907 | "half8 __ovld __cnfn convert_half8_rte(ushort8);\n" |
| 32908 | "half8 __ovld __cnfn convert_half8_rte(int8);\n" |
| 32909 | "half8 __ovld __cnfn convert_half8_rte(uint8);\n" |
| 32910 | "half8 __ovld __cnfn convert_half8_rte(long8);\n" |
| 32911 | "half8 __ovld __cnfn convert_half8_rte(ulong8);\n" |
| 32912 | "half8 __ovld __cnfn convert_half8_rte(float8);\n" |
| 32913 | "half8 __ovld __cnfn convert_half8_rte(half8);\n" |
| 32914 | "half8 __ovld __cnfn convert_half8_rtp(char8);\n" |
| 32915 | "half8 __ovld __cnfn convert_half8_rtp(uchar8);\n" |
| 32916 | "half8 __ovld __cnfn convert_half8_rtp(short8);\n" |
| 32917 | "half8 __ovld __cnfn convert_half8_rtp(ushort8);\n" |
| 32918 | "half8 __ovld __cnfn convert_half8_rtp(int8);\n" |
| 32919 | "half8 __ovld __cnfn convert_half8_rtp(uint8);\n" |
| 32920 | "half8 __ovld __cnfn convert_half8_rtp(long8);\n" |
| 32921 | "half8 __ovld __cnfn convert_half8_rtp(ulong8);\n" |
| 32922 | "half8 __ovld __cnfn convert_half8_rtp(float8);\n" |
| 32923 | "half8 __ovld __cnfn convert_half8_rtp(half8);\n" |
| 32924 | "half8 __ovld __cnfn convert_half8_rtn(char8);\n" |
| 32925 | "half8 __ovld __cnfn convert_half8_rtn(uchar8);\n" |
| 32926 | "half8 __ovld __cnfn convert_half8_rtn(short8);\n" |
| 32927 | "half8 __ovld __cnfn convert_half8_rtn(ushort8);\n" |
| 32928 | "half8 __ovld __cnfn convert_half8_rtn(int8);\n" |
| 32929 | "half8 __ovld __cnfn convert_half8_rtn(uint8);\n" |
| 32930 | "half8 __ovld __cnfn convert_half8_rtn(long8);\n" |
| 32931 | "half8 __ovld __cnfn convert_half8_rtn(ulong8);\n" |
| 32932 | "half8 __ovld __cnfn convert_half8_rtn(float8);\n" |
| 32933 | "half8 __ovld __cnfn convert_half8_rtn(half8);\n" |
| 32934 | "half8 __ovld __cnfn convert_half8_rtz(char8);\n" |
| 32935 | "half8 __ovld __cnfn convert_half8_rtz(uchar8);\n" |
| 32936 | "half8 __ovld __cnfn convert_half8_rtz(short8);\n" |
| 32937 | "half8 __ovld __cnfn convert_half8_rtz(ushort8);\n" |
| 32938 | "half8 __ovld __cnfn convert_half8_rtz(int8);\n" |
| 32939 | "half8 __ovld __cnfn convert_half8_rtz(uint8);\n" |
| 32940 | "half8 __ovld __cnfn convert_half8_rtz(long8);\n" |
| 32941 | "half8 __ovld __cnfn convert_half8_rtz(ulong8);\n" |
| 32942 | "half8 __ovld __cnfn convert_half8_rtz(float8);\n" |
| 32943 | "half8 __ovld __cnfn convert_half8_rtz(half8);\n" |
| 32944 | "half16 __ovld __cnfn convert_half16(char16);\n" |
| 32945 | "half16 __ovld __cnfn convert_half16(uchar16);\n" |
| 32946 | "half16 __ovld __cnfn convert_half16(short16);\n" |
| 32947 | "half16 __ovld __cnfn convert_half16(ushort16);\n" |
| 32948 | "half16 __ovld __cnfn convert_half16(int16);\n" |
| 32949 | "half16 __ovld __cnfn convert_half16(uint16);\n" |
| 32950 | "half16 __ovld __cnfn convert_half16(long16);\n" |
| 32951 | "half16 __ovld __cnfn convert_half16(ulong16);\n" |
| 32952 | "half16 __ovld __cnfn convert_half16(float16);\n" |
| 32953 | "half16 __ovld __cnfn convert_half16(half16);\n" |
| 32954 | "half16 __ovld __cnfn convert_half16_rte(char16);\n" |
| 32955 | "half16 __ovld __cnfn convert_half16_rte(uchar16);\n" |
| 32956 | "half16 __ovld __cnfn convert_half16_rte(short16);\n" |
| 32957 | "half16 __ovld __cnfn convert_half16_rte(ushort16);\n" |
| 32958 | "half16 __ovld __cnfn convert_half16_rte(int16);\n" |
| 32959 | "half16 __ovld __cnfn convert_half16_rte(uint16);\n" |
| 32960 | "half16 __ovld __cnfn convert_half16_rte(long16);\n" |
| 32961 | "half16 __ovld __cnfn convert_half16_rte(ulong16);\n" |
| 32962 | "half16 __ovld __cnfn convert_half16_rte(float16);\n" |
| 32963 | "half16 __ovld __cnfn convert_half16_rte(half16);\n" |
| 32964 | "half16 __ovld __cnfn convert_half16_rtp(char16);\n" |
| 32965 | "half16 __ovld __cnfn convert_half16_rtp(uchar16);\n" |
| 32966 | "half16 __ovld __cnfn convert_half16_rtp(short16);\n" |
| 32967 | "half16 __ovld __cnfn convert_half16_rtp(ushort16);\n" |
| 32968 | "half16 __ovld __cnfn convert_half16_rtp(int16);\n" |
| 32969 | "half16 __ovld __cnfn convert_half16_rtp(uint16);\n" |
| 32970 | "half16 __ovld __cnfn convert_half16_rtp(long16);\n" |
| 32971 | "half16 __ovld __cnfn convert_half16_rtp(ulong16);\n" |
| 32972 | "half16 __ovld __cnfn convert_half16_rtp(float16);\n" |
| 32973 | "half16 __ovld __cnfn convert_half16_rtp(half16);\n" |
| 32974 | "half16 __ovld __cnfn convert_half16_rtn(char16);\n" |
| 32975 | "half16 __ovld __cnfn convert_half16_rtn(uchar16);\n" |
| 32976 | "half16 __ovld __cnfn convert_half16_rtn(short16);\n" |
| 32977 | "half16 __ovld __cnfn convert_half16_rtn(ushort16);\n" |
| 32978 | "half16 __ovld __cnfn convert_half16_rtn(int16);\n" |
| 32979 | "half16 __ovld __cnfn convert_half16_rtn(uint16);\n" |
| 32980 | "half16 __ovld __cnfn convert_half16_rtn(long16);\n" |
| 32981 | "half16 __ovld __cnfn convert_half16_rtn(ulong16);\n" |
| 32982 | "half16 __ovld __cnfn convert_half16_rtn(float16);\n" |
| 32983 | "half16 __ovld __cnfn convert_half16_rtn(half16);\n" |
| 32984 | "half16 __ovld __cnfn convert_half16_rtz(char16);\n" |
| 32985 | "half16 __ovld __cnfn convert_half16_rtz(uchar16);\n" |
| 32986 | "half16 __ovld __cnfn convert_half16_rtz(short16);\n" |
| 32987 | "half16 __ovld __cnfn convert_half16_rtz(ushort16);\n" |
| 32988 | "half16 __ovld __cnfn convert_half16_rtz(int16);\n" |
| 32989 | "half16 __ovld __cnfn convert_half16_rtz(uint16);\n" |
| 32990 | "half16 __ovld __cnfn convert_half16_rtz(long16);\n" |
| 32991 | "half16 __ovld __cnfn convert_half16_rtz(ulong16);\n" |
| 32992 | "half16 __ovld __cnfn convert_half16_rtz(float16);\n" |
| 32993 | "half16 __ovld __cnfn convert_half16_rtz(half16);\n" |
| 32994 | "\n" |
| 32995 | "// Convert half types to double types.\n" |
| 32996 | "#ifdef cl_khr_fp64\n" |
| 32997 | "double __ovld __cnfn convert_double(half);\n" |
| 32998 | "double __ovld __cnfn convert_double_rte(half);\n" |
| 32999 | "double __ovld __cnfn convert_double_rtp(half);\n" |
| 33000 | "double __ovld __cnfn convert_double_rtn(half);\n" |
| 33001 | "double __ovld __cnfn convert_double_rtz(half);\n" |
| 33002 | "double2 __ovld __cnfn convert_double2(half2);\n" |
| 33003 | "double2 __ovld __cnfn convert_double2_rte(half2);\n" |
| 33004 | "double2 __ovld __cnfn convert_double2_rtp(half2);\n" |
| 33005 | "double2 __ovld __cnfn convert_double2_rtn(half2);\n" |
| 33006 | "double2 __ovld __cnfn convert_double2_rtz(half2);\n" |
| 33007 | "double3 __ovld __cnfn convert_double3(half3);\n" |
| 33008 | "double3 __ovld __cnfn convert_double3_rte(half3);\n" |
| 33009 | "double3 __ovld __cnfn convert_double3_rtp(half3);\n" |
| 33010 | "double3 __ovld __cnfn convert_double3_rtn(half3);\n" |
| 33011 | "double3 __ovld __cnfn convert_double3_rtz(half3);\n" |
| 33012 | "double4 __ovld __cnfn convert_double4(half4);\n" |
| 33013 | "double4 __ovld __cnfn convert_double4_rte(half4);\n" |
| 33014 | "double4 __ovld __cnfn convert_double4_rtp(half4);\n" |
| 33015 | "double4 __ovld __cnfn convert_double4_rtn(half4);\n" |
| 33016 | "double4 __ovld __cnfn convert_double4_rtz(half4);\n" |
| 33017 | "double8 __ovld __cnfn convert_double8(half8);\n" |
| 33018 | "double8 __ovld __cnfn convert_double8_rte(half8);\n" |
| 33019 | "double8 __ovld __cnfn convert_double8_rtp(half8);\n" |
| 33020 | "double8 __ovld __cnfn convert_double8_rtn(half8);\n" |
| 33021 | "double8 __ovld __cnfn convert_double8_rtz(half8);\n" |
| 33022 | "double16 __ovld __cnfn convert_double16(half16);\n" |
| 33023 | "double16 __ovld __cnfn convert_double16_rte(half16);\n" |
| 33024 | "double16 __ovld __cnfn convert_double16_rtp(half16);\n" |
| 33025 | "double16 __ovld __cnfn convert_double16_rtn(half16);\n" |
| 33026 | "double16 __ovld __cnfn convert_double16_rtz(half16);\n" |
| 33027 | "\n" |
| 33028 | "// Convert double types to half types.\n" |
| 33029 | "half __ovld __cnfn convert_half(double);\n" |
| 33030 | "half __ovld __cnfn convert_half_rte(double);\n" |
| 33031 | "half __ovld __cnfn convert_half_rtp(double);\n" |
| 33032 | "half __ovld __cnfn convert_half_rtn(double);\n" |
| 33033 | "half __ovld __cnfn convert_half_rtz(double);\n" |
| 33034 | "half2 __ovld __cnfn convert_half2(double2);\n" |
| 33035 | "half2 __ovld __cnfn convert_half2_rte(double2);\n" |
| 33036 | "half2 __ovld __cnfn convert_half2_rtp(double2);\n" |
| 33037 | "half2 __ovld __cnfn convert_half2_rtn(double2);\n" |
| 33038 | "half2 __ovld __cnfn convert_half2_rtz(double2);\n" |
| 33039 | "half3 __ovld __cnfn convert_half3(double3);\n" |
| 33040 | "half3 __ovld __cnfn convert_half3_rte(double3);\n" |
| 33041 | "half3 __ovld __cnfn convert_half3_rtp(double3);\n" |
| 33042 | "half3 __ovld __cnfn convert_half3_rtn(double3);\n" |
| 33043 | "half3 __ovld __cnfn convert_half3_rtz(double3);\n" |
| 33044 | "half4 __ovld __cnfn convert_half4(double4);\n" |
| 33045 | "half4 __ovld __cnfn convert_half4_rte(double4);\n" |
| 33046 | "half4 __ovld __cnfn convert_half4_rtp(double4);\n" |
| 33047 | "half4 __ovld __cnfn convert_half4_rtn(double4);\n" |
| 33048 | "half4 __ovld __cnfn convert_half4_rtz(double4);\n" |
| 33049 | "half8 __ovld __cnfn convert_half8(double8);\n" |
| 33050 | "half8 __ovld __cnfn convert_half8_rte(double8);\n" |
| 33051 | "half8 __ovld __cnfn convert_half8_rtp(double8);\n" |
| 33052 | "half8 __ovld __cnfn convert_half8_rtn(double8);\n" |
| 33053 | "half8 __ovld __cnfn convert_half8_rtz(double8);\n" |
| 33054 | "half16 __ovld __cnfn convert_half16(double16);\n" |
| 33055 | "half16 __ovld __cnfn convert_half16_rte(double16);\n" |
| 33056 | "half16 __ovld __cnfn convert_half16_rtp(double16);\n" |
| 33057 | "half16 __ovld __cnfn convert_half16_rtn(double16);\n" |
| 33058 | "half16 __ovld __cnfn convert_half16_rtz(double16);\n" |
| 33059 | "#endif //cl_khr_fp64\n" |
| 33060 | "\n" |
| 33061 | "#endif // cl_khr_fp16\n" |
| 33062 | "\n" |
| 33063 | "/**\n" |
| 33064 | " * OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators\n" |
| 33065 | " * Reinterprets a data type as another data type of the same size\n" |
| 33066 | " */\n" |
| 33067 | "#define as_char(x) __builtin_astype((x), char)\n" |
| 33068 | "#define as_char2(x) __builtin_astype((x), char2)\n" |
| 33069 | "#define as_char3(x) __builtin_astype((x), char3)\n" |
| 33070 | "#define as_char4(x) __builtin_astype((x), char4)\n" |
| 33071 | "#define as_char8(x) __builtin_astype((x), char8)\n" |
| 33072 | "#define as_char16(x) __builtin_astype((x), char16)\n" |
| 33073 | "\n" |
| 33074 | "#define as_uchar(x) __builtin_astype((x), uchar)\n" |
| 33075 | "#define as_uchar2(x) __builtin_astype((x), uchar2)\n" |
| 33076 | "#define as_uchar3(x) __builtin_astype((x), uchar3)\n" |
| 33077 | "#define as_uchar4(x) __builtin_astype((x), uchar4)\n" |
| 33078 | "#define as_uchar8(x) __builtin_astype((x), uchar8)\n" |
| 33079 | "#define as_uchar16(x) __builtin_astype((x), uchar16)\n" |
| 33080 | "\n" |
| 33081 | "#define as_short(x) __builtin_astype((x), short)\n" |
| 33082 | "#define as_short2(x) __builtin_astype((x), short2)\n" |
| 33083 | "#define as_short3(x) __builtin_astype((x), short3)\n" |
| 33084 | "#define as_short4(x) __builtin_astype((x), short4)\n" |
| 33085 | "#define as_short8(x) __builtin_astype((x), short8)\n" |
| 33086 | "#define as_short16(x) __builtin_astype((x), short16)\n" |
| 33087 | "\n" |
| 33088 | "#define as_ushort(x) __builtin_astype((x), ushort)\n" |
| 33089 | "#define as_ushort2(x) __builtin_astype((x), ushort2)\n" |
| 33090 | "#define as_ushort3(x) __builtin_astype((x), ushort3)\n" |
| 33091 | "#define as_ushort4(x) __builtin_astype((x), ushort4)\n" |
| 33092 | "#define as_ushort8(x) __builtin_astype((x), ushort8)\n" |
| 33093 | "#define as_ushort16(x) __builtin_astype((x), ushort16)\n" |
| 33094 | "\n" |
| 33095 | "#define as_int(x) __builtin_astype((x), int)\n" |
| 33096 | "#define as_int2(x) __builtin_astype((x), int2)\n" |
| 33097 | "#define as_int3(x) __builtin_astype((x), int3)\n" |
| 33098 | "#define as_int4(x) __builtin_astype((x), int4)\n" |
| 33099 | "#define as_int8(x) __builtin_astype((x), int8)\n" |
| 33100 | "#define as_int16(x) __builtin_astype((x), int16)\n" |
| 33101 | "\n" |
| 33102 | "#define as_uint(x) __builtin_astype((x), uint)\n" |
| 33103 | "#define as_uint2(x) __builtin_astype((x), uint2)\n" |
| 33104 | "#define as_uint3(x) __builtin_astype((x), uint3)\n" |
| 33105 | "#define as_uint4(x) __builtin_astype((x), uint4)\n" |
| 33106 | "#define as_uint8(x) __builtin_astype((x), uint8)\n" |
| 33107 | "#define as_uint16(x) __builtin_astype((x), uint16)\n" |
| 33108 | "\n" |
| 33109 | "#define as_long(x) __builtin_astype((x), long)\n" |
| 33110 | "#define as_long2(x) __builtin_astype((x), long2)\n" |
| 33111 | "#define as_long3(x) __builtin_astype((x), long3)\n" |
| 33112 | "#define as_long4(x) __builtin_astype((x), long4)\n" |
| 33113 | "#define as_long8(x) __builtin_astype((x), long8)\n" |
| 33114 | "#define as_long16(x) __builtin_astype((x), long16)\n" |
| 33115 | "\n" |
| 33116 | "#define as_ulong(x) __builtin_astype((x), ulong)\n" |
| 33117 | "#define as_ulong2(x) __builtin_astype((x), ulong2)\n" |
| 33118 | "#define as_ulong3(x) __builtin_astype((x), ulong3)\n" |
| 33119 | "#define as_ulong4(x) __builtin_astype((x), ulong4)\n" |
| 33120 | "#define as_ulong8(x) __builtin_astype((x), ulong8)\n" |
| 33121 | "#define as_ulong16(x) __builtin_astype((x), ulong16)\n" |
| 33122 | "\n" |
| 33123 | "#define as_float(x) __builtin_astype((x), float)\n" |
| 33124 | "#define as_float2(x) __builtin_astype((x), float2)\n" |
| 33125 | "#define as_float3(x) __builtin_astype((x), float3)\n" |
| 33126 | "#define as_float4(x) __builtin_astype((x), float4)\n" |
| 33127 | "#define as_float8(x) __builtin_astype((x), float8)\n" |
| 33128 | "#define as_float16(x) __builtin_astype((x), float16)\n" |
| 33129 | "\n" |
| 33130 | "#ifdef cl_khr_fp64\n" |
| 33131 | "#define as_double(x) __builtin_astype((x), double)\n" |
| 33132 | "#define as_double2(x) __builtin_astype((x), double2)\n" |
| 33133 | "#define as_double3(x) __builtin_astype((x), double3)\n" |
| 33134 | "#define as_double4(x) __builtin_astype((x), double4)\n" |
| 33135 | "#define as_double8(x) __builtin_astype((x), double8)\n" |
| 33136 | "#define as_double16(x) __builtin_astype((x), double16)\n" |
| 33137 | "#endif //cl_khr_fp64\n" |
| 33138 | "\n" |
| 33139 | "#ifdef cl_khr_fp16\n" |
| 33140 | "#define as_half(x) __builtin_astype((x), half)\n" |
| 33141 | "#define as_half2(x) __builtin_astype((x), half2)\n" |
| 33142 | "#define as_half3(x) __builtin_astype((x), half3)\n" |
| 33143 | "#define as_half4(x) __builtin_astype((x), half4)\n" |
| 33144 | "#define as_half8(x) __builtin_astype((x), half8)\n" |
| 33145 | "#define as_half16(x) __builtin_astype((x), half16)\n" |
| 33146 | "#endif //cl_khr_fp16\n" |
| 33147 | "\n" |
| 33148 | "// OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers\n" |
| 33149 | "\n" |
| 33150 | "#define __kernel_exec(X, typen) __kernel \\\n" |
| 33151 | " __attribute__((work_group_size_hint(X, 1, 1))) \\\n" |
| 33152 | " __attribute__((vec_type_hint(typen)))\n" |
| 33153 | "\n" |
| 33154 | "#define kernel_exec(X, typen) __kernel \\\n" |
| 33155 | " __attribute__((work_group_size_hint(X, 1, 1))) \\\n" |
| 33156 | " __attribute__((vec_type_hint(typen)))\n" |
| 33157 | "\n" |
| 33158 | "// OpenCL v1.1 s6.11.1, v1.2 s6.12.1, v2.0 s6.13.1 - Work-item Functions\n" |
| 33159 | "\n" |
| 33160 | "/**\n" |
| 33161 | " * Returns the number of dimensions in use. This is the\n" |
| 33162 | " * value given to the work_dim argument specified in\n" |
| 33163 | " * clEnqueueNDRangeKernel.\n" |
| 33164 | " * For clEnqueueTask, this returns 1.\n" |
| 33165 | " */\n" |
| 33166 | "uint __ovld __cnfn get_work_dim(void);\n" |
| 33167 | "\n" |
| 33168 | "/**\n" |
| 33169 | " * Returns the number of global work-items specified for\n" |
| 33170 | " * dimension identified by dimindx. This value is given by\n" |
| 33171 | " * the global_work_size argument to\n" |
| 33172 | " * clEnqueueNDRangeKernel. Valid values of dimindx\n" |
| 33173 | " * are 0 to get_work_dim() - 1. For other values of\n" |
| 33174 | " * dimindx, get_global_size() returns 1.\n" |
| 33175 | " * For clEnqueueTask, this always returns 1.\n" |
| 33176 | " */\n" |
| 33177 | "size_t __ovld __cnfn get_global_size(uint dimindx);\n" |
| 33178 | "\n" |
| 33179 | "/**\n" |
| 33180 | " * Returns the unique global work-item ID value for\n" |
| 33181 | " * dimension identified by dimindx. The global work-item\n" |
| 33182 | " * ID specifies the work-item ID based on the number of\n" |
| 33183 | " * global work-items specified to execute the kernel. Valid\n" |
| 33184 | " * values of dimindx are 0 to get_work_dim() - 1. For\n" |
| 33185 | " * other values of dimindx, get_global_id() returns 0.\n" |
| 33186 | " * For clEnqueueTask, this returns 0.\n" |
| 33187 | " */\n" |
| 33188 | "size_t __ovld __cnfn get_global_id(uint dimindx);\n" |
| 33189 | "\n" |
| 33190 | "/**\n" |
| 33191 | " * Returns the number of local work-items specified in\n" |
| 33192 | " * dimension identified by dimindx. This value is given by\n" |
| 33193 | " * the local_work_size argument to\n" |
| 33194 | " * clEnqueueNDRangeKernel if local_work_size is not\n" |
| 33195 | " * NULL; otherwise the OpenCL implementation chooses\n" |
| 33196 | " * an appropriate local_work_size value which is returned\n" |
| 33197 | " * by this function. Valid values of dimindx are 0 to\n" |
| 33198 | " * get_work_dim() - 1. For other values of dimindx,\n" |
| 33199 | " * get_local_size() returns 1.\n" |
| 33200 | " * For clEnqueueTask, this always returns 1.\n" |
| 33201 | " */\n" |
| 33202 | "size_t __ovld __cnfn get_local_size(uint dimindx);\n" |
| 33203 | "\n" |
| 33204 | "/**\n" |
| 33205 | " * Returns the unique local work-item ID i.e. a work-item\n" |
| 33206 | " * within a specific work-group for dimension identified by\n" |
| 33207 | " * dimindx. Valid values of dimindx are 0 to\n" |
| 33208 | " * get_work_dim() - 1. For other values of dimindx,\n" |
| 33209 | " * get_local_id() returns 0.\n" |
| 33210 | " * For clEnqueueTask, this returns 0.\n" |
| 33211 | " */\n" |
| 33212 | "size_t __ovld __cnfn get_local_id(uint dimindx);\n" |
| 33213 | "\n" |
| 33214 | "/**\n" |
| 33215 | " * Returns the number of work-groups that will execute a\n" |
| 33216 | " * kernel for dimension identified by dimindx.\n" |
| 33217 | " * Valid values of dimindx are 0 to get_work_dim() - 1.\n" |
| 33218 | " * For other values of dimindx, get_num_groups () returns\n" |
| 33219 | " * 1.\n" |
| 33220 | " * For clEnqueueTask, this always returns 1.\n" |
| 33221 | " */\n" |
| 33222 | "size_t __ovld __cnfn get_num_groups(uint dimindx);\n" |
| 33223 | "\n" |
| 33224 | "/**\n" |
| 33225 | " * get_group_id returns the work-group ID which is a\n" |
| 33226 | " * number from 0 .. get_num_groups(dimindx) - 1.\n" |
| 33227 | " * Valid values of dimindx are 0 to get_work_dim() - 1.\n" |
| 33228 | " * For other values, get_group_id() returns 0.\n" |
| 33229 | " * For clEnqueueTask, this returns 0.\n" |
| 33230 | " */\n" |
| 33231 | "size_t __ovld __cnfn get_group_id(uint dimindx);\n" |
| 33232 | "\n" |
| 33233 | "/**\n" |
| 33234 | " * get_global_offset returns the offset values specified in\n" |
| 33235 | " * global_work_offset argument to\n" |
| 33236 | " * clEnqueueNDRangeKernel.\n" |
| 33237 | " * Valid values of dimindx are 0 to get_work_dim() - 1.\n" |
| 33238 | " * For other values, get_global_offset() returns 0.\n" |
| 33239 | " * For clEnqueueTask, this returns 0.\n" |
| 33240 | " */\n" |
| 33241 | "size_t __ovld __cnfn get_global_offset(uint dimindx);\n" |
| 33242 | "\n" |
| 33243 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 33244 | "size_t __ovld get_enqueued_local_size(uint dimindx);\n" |
| 33245 | "size_t __ovld get_global_linear_id(void);\n" |
| 33246 | "size_t __ovld get_local_linear_id(void);\n" |
| 33247 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 33248 | "\n" |
| 33249 | "// OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions\n" |
| 33250 | "\n" |
| 33251 | "/**\n" |
| 33252 | " * Arc cosine function.\n" |
| 33253 | " */\n" |
| 33254 | "float __ovld __cnfn acos(float);\n" |
| 33255 | "float2 __ovld __cnfn acos(float2);\n" |
| 33256 | "float3 __ovld __cnfn acos(float3);\n" |
| 33257 | "float4 __ovld __cnfn acos(float4);\n" |
| 33258 | "float8 __ovld __cnfn acos(float8);\n" |
| 33259 | "float16 __ovld __cnfn acos(float16);\n" |
| 33260 | "#ifdef cl_khr_fp64\n" |
| 33261 | "double __ovld __cnfn acos(double);\n" |
| 33262 | "double2 __ovld __cnfn acos(double2);\n" |
| 33263 | "double3 __ovld __cnfn acos(double3);\n" |
| 33264 | "double4 __ovld __cnfn acos(double4);\n" |
| 33265 | "double8 __ovld __cnfn acos(double8);\n" |
| 33266 | "double16 __ovld __cnfn acos(double16);\n" |
| 33267 | "#endif //cl_khr_fp64\n" |
| 33268 | "#ifdef cl_khr_fp16\n" |
| 33269 | "half __ovld __cnfn acos(half);\n" |
| 33270 | "half2 __ovld __cnfn acos(half2);\n" |
| 33271 | "half3 __ovld __cnfn acos(half3);\n" |
| 33272 | "half4 __ovld __cnfn acos(half4);\n" |
| 33273 | "half8 __ovld __cnfn acos(half8);\n" |
| 33274 | "half16 __ovld __cnfn acos(half16);\n" |
| 33275 | "#endif //cl_khr_fp16\n" |
| 33276 | "\n" |
| 33277 | "/**\n" |
| 33278 | " * Inverse hyperbolic cosine.\n" |
| 33279 | " */\n" |
| 33280 | "float __ovld __cnfn acosh(float);\n" |
| 33281 | "float2 __ovld __cnfn acosh(float2);\n" |
| 33282 | "float3 __ovld __cnfn acosh(float3);\n" |
| 33283 | "float4 __ovld __cnfn acosh(float4);\n" |
| 33284 | "float8 __ovld __cnfn acosh(float8);\n" |
| 33285 | "float16 __ovld __cnfn acosh(float16);\n" |
| 33286 | "#ifdef cl_khr_fp64\n" |
| 33287 | "double __ovld __cnfn acosh(double);\n" |
| 33288 | "double2 __ovld __cnfn acosh(double2);\n" |
| 33289 | "double3 __ovld __cnfn acosh(double3);\n" |
| 33290 | "double4 __ovld __cnfn acosh(double4);\n" |
| 33291 | "double8 __ovld __cnfn acosh(double8);\n" |
| 33292 | "double16 __ovld __cnfn acosh(double16);\n" |
| 33293 | "#endif //cl_khr_fp64\n" |
| 33294 | "#ifdef cl_khr_fp16\n" |
| 33295 | "half __ovld __cnfn acosh(half);\n" |
| 33296 | "half2 __ovld __cnfn acosh(half2);\n" |
| 33297 | "half3 __ovld __cnfn acosh(half3);\n" |
| 33298 | "half4 __ovld __cnfn acosh(half4);\n" |
| 33299 | "half8 __ovld __cnfn acosh(half8);\n" |
| 33300 | "half16 __ovld __cnfn acosh(half16);\n" |
| 33301 | "#endif //cl_khr_fp16\n" |
| 33302 | "\n" |
| 33303 | "/**\n" |
| 33304 | " * Compute acos (x) / PI.\n" |
| 33305 | " */\n" |
| 33306 | "float __ovld __cnfn acospi(float x);\n" |
| 33307 | "float2 __ovld __cnfn acospi(float2 x);\n" |
| 33308 | "float3 __ovld __cnfn acospi(float3 x);\n" |
| 33309 | "float4 __ovld __cnfn acospi(float4 x);\n" |
| 33310 | "float8 __ovld __cnfn acospi(float8 x);\n" |
| 33311 | "float16 __ovld __cnfn acospi(float16 x);\n" |
| 33312 | "#ifdef cl_khr_fp64\n" |
| 33313 | "double __ovld __cnfn acospi(double x);\n" |
| 33314 | "double2 __ovld __cnfn acospi(double2 x);\n" |
| 33315 | "double3 __ovld __cnfn acospi(double3 x);\n" |
| 33316 | "double4 __ovld __cnfn acospi(double4 x);\n" |
| 33317 | "double8 __ovld __cnfn acospi(double8 x);\n" |
| 33318 | "double16 __ovld __cnfn acospi(double16 x);\n" |
| 33319 | "#endif //cl_khr_fp64\n" |
| 33320 | "#ifdef cl_khr_fp16\n" |
| 33321 | "half __ovld __cnfn acospi(half x);\n" |
| 33322 | "half2 __ovld __cnfn acospi(half2 x);\n" |
| 33323 | "half3 __ovld __cnfn acospi(half3 x);\n" |
| 33324 | "half4 __ovld __cnfn acospi(half4 x);\n" |
| 33325 | "half8 __ovld __cnfn acospi(half8 x);\n" |
| 33326 | "half16 __ovld __cnfn acospi(half16 x);\n" |
| 33327 | "#endif //cl_khr_fp16\n" |
| 33328 | "\n" |
| 33329 | "/**\n" |
| 33330 | " * Arc sine function.\n" |
| 33331 | " */\n" |
| 33332 | "float __ovld __cnfn asin(float);\n" |
| 33333 | "float2 __ovld __cnfn asin(float2);\n" |
| 33334 | "float3 __ovld __cnfn asin(float3);\n" |
| 33335 | "float4 __ovld __cnfn asin(float4);\n" |
| 33336 | "float8 __ovld __cnfn asin(float8);\n" |
| 33337 | "float16 __ovld __cnfn asin(float16);\n" |
| 33338 | "#ifdef cl_khr_fp64\n" |
| 33339 | "double __ovld __cnfn asin(double);\n" |
| 33340 | "double2 __ovld __cnfn asin(double2);\n" |
| 33341 | "double3 __ovld __cnfn asin(double3);\n" |
| 33342 | "double4 __ovld __cnfn asin(double4);\n" |
| 33343 | "double8 __ovld __cnfn asin(double8);\n" |
| 33344 | "double16 __ovld __cnfn asin(double16);\n" |
| 33345 | "#endif //cl_khr_fp64\n" |
| 33346 | "#ifdef cl_khr_fp16\n" |
| 33347 | "half __ovld __cnfn asin(half);\n" |
| 33348 | "half2 __ovld __cnfn asin(half2);\n" |
| 33349 | "half3 __ovld __cnfn asin(half3);\n" |
| 33350 | "half4 __ovld __cnfn asin(half4);\n" |
| 33351 | "half8 __ovld __cnfn asin(half8);\n" |
| 33352 | "half16 __ovld __cnfn asin(half16);\n" |
| 33353 | "#endif //cl_khr_fp16\n" |
| 33354 | "\n" |
| 33355 | "/**\n" |
| 33356 | " * Inverse hyperbolic sine.\n" |
| 33357 | " */\n" |
| 33358 | "float __ovld __cnfn asinh(float);\n" |
| 33359 | "float2 __ovld __cnfn asinh(float2);\n" |
| 33360 | "float3 __ovld __cnfn asinh(float3);\n" |
| 33361 | "float4 __ovld __cnfn asinh(float4);\n" |
| 33362 | "float8 __ovld __cnfn asinh(float8);\n" |
| 33363 | "float16 __ovld __cnfn asinh(float16);\n" |
| 33364 | "#ifdef cl_khr_fp64\n" |
| 33365 | "double __ovld __cnfn asinh(double);\n" |
| 33366 | "double2 __ovld __cnfn asinh(double2);\n" |
| 33367 | "double3 __ovld __cnfn asinh(double3);\n" |
| 33368 | "double4 __ovld __cnfn asinh(double4);\n" |
| 33369 | "double8 __ovld __cnfn asinh(double8);\n" |
| 33370 | "double16 __ovld __cnfn asinh(double16);\n" |
| 33371 | "#endif //cl_khr_fp64\n" |
| 33372 | "#ifdef cl_khr_fp16\n" |
| 33373 | "half __ovld __cnfn asinh(half);\n" |
| 33374 | "half2 __ovld __cnfn asinh(half2);\n" |
| 33375 | "half3 __ovld __cnfn asinh(half3);\n" |
| 33376 | "half4 __ovld __cnfn asinh(half4);\n" |
| 33377 | "half8 __ovld __cnfn asinh(half8);\n" |
| 33378 | "half16 __ovld __cnfn asinh(half16);\n" |
| 33379 | "#endif //cl_khr_fp16\n" |
| 33380 | "\n" |
| 33381 | "/**\n" |
| 33382 | " * Compute asin (x) / PI.\n" |
| 33383 | " */\n" |
| 33384 | "float __ovld __cnfn asinpi(float x);\n" |
| 33385 | "float2 __ovld __cnfn asinpi(float2 x);\n" |
| 33386 | "float3 __ovld __cnfn asinpi(float3 x);\n" |
| 33387 | "float4 __ovld __cnfn asinpi(float4 x);\n" |
| 33388 | "float8 __ovld __cnfn asinpi(float8 x);\n" |
| 33389 | "float16 __ovld __cnfn asinpi(float16 x);\n" |
| 33390 | "#ifdef cl_khr_fp64\n" |
| 33391 | "double __ovld __cnfn asinpi(double x);\n" |
| 33392 | "double2 __ovld __cnfn asinpi(double2 x);\n" |
| 33393 | "double3 __ovld __cnfn asinpi(double3 x);\n" |
| 33394 | "double4 __ovld __cnfn asinpi(double4 x);\n" |
| 33395 | "double8 __ovld __cnfn asinpi(double8 x);\n" |
| 33396 | "double16 __ovld __cnfn asinpi(double16 x);\n" |
| 33397 | "#endif //cl_khr_fp64\n" |
| 33398 | "#ifdef cl_khr_fp16\n" |
| 33399 | "half __ovld __cnfn asinpi(half x);\n" |
| 33400 | "half2 __ovld __cnfn asinpi(half2 x);\n" |
| 33401 | "half3 __ovld __cnfn asinpi(half3 x);\n" |
| 33402 | "half4 __ovld __cnfn asinpi(half4 x);\n" |
| 33403 | "half8 __ovld __cnfn asinpi(half8 x);\n" |
| 33404 | "half16 __ovld __cnfn asinpi(half16 x);\n" |
| 33405 | "#endif //cl_khr_fp16\n" |
| 33406 | "\n" |
| 33407 | "/**\n" |
| 33408 | " * Arc tangent function.\n" |
| 33409 | " */\n" |
| 33410 | "float __ovld __cnfn atan(float y_over_x);\n" |
| 33411 | "float2 __ovld __cnfn atan(float2 y_over_x);\n" |
| 33412 | "float3 __ovld __cnfn atan(float3 y_over_x);\n" |
| 33413 | "float4 __ovld __cnfn atan(float4 y_over_x);\n" |
| 33414 | "float8 __ovld __cnfn atan(float8 y_over_x);\n" |
| 33415 | "float16 __ovld __cnfn atan(float16 y_over_x);\n" |
| 33416 | "#ifdef cl_khr_fp64\n" |
| 33417 | "double __ovld __cnfn atan(double y_over_x);\n" |
| 33418 | "double2 __ovld __cnfn atan(double2 y_over_x);\n" |
| 33419 | "double3 __ovld __cnfn atan(double3 y_over_x);\n" |
| 33420 | "double4 __ovld __cnfn atan(double4 y_over_x);\n" |
| 33421 | "double8 __ovld __cnfn atan(double8 y_over_x);\n" |
| 33422 | "double16 __ovld __cnfn atan(double16 y_over_x);\n" |
| 33423 | "#endif //cl_khr_fp64\n" |
| 33424 | "#ifdef cl_khr_fp16\n" |
| 33425 | "half __ovld __cnfn atan(half y_over_x);\n" |
| 33426 | "half2 __ovld __cnfn atan(half2 y_over_x);\n" |
| 33427 | "half3 __ovld __cnfn atan(half3 y_over_x);\n" |
| 33428 | "half4 __ovld __cnfn atan(half4 y_over_x);\n" |
| 33429 | "half8 __ovld __cnfn atan(half8 y_over_x);\n" |
| 33430 | "half16 __ovld __cnfn atan(half16 y_over_x);\n" |
| 33431 | "#endif //cl_khr_fp16\n" |
| 33432 | "\n" |
| 33433 | "/**\n" |
| 33434 | " * Arc tangent of y / x.\n" |
| 33435 | " */\n" |
| 33436 | "float __ovld __cnfn atan2(float y, float x);\n" |
| 33437 | "float2 __ovld __cnfn atan2(float2 y, float2 x);\n" |
| 33438 | "float3 __ovld __cnfn atan2(float3 y, float3 x);\n" |
| 33439 | "float4 __ovld __cnfn atan2(float4 y, float4 x);\n" |
| 33440 | "float8 __ovld __cnfn atan2(float8 y, float8 x);\n" |
| 33441 | "float16 __ovld __cnfn atan2(float16 y, float16 x);\n" |
| 33442 | "#ifdef cl_khr_fp64\n" |
| 33443 | "double __ovld __cnfn atan2(double y, double x);\n" |
| 33444 | "double2 __ovld __cnfn atan2(double2 y, double2 x);\n" |
| 33445 | "double3 __ovld __cnfn atan2(double3 y, double3 x);\n" |
| 33446 | "double4 __ovld __cnfn atan2(double4 y, double4 x);\n" |
| 33447 | "double8 __ovld __cnfn atan2(double8 y, double8 x);\n" |
| 33448 | "double16 __ovld __cnfn atan2(double16 y, double16 x);\n" |
| 33449 | "#endif //cl_khr_fp64\n" |
| 33450 | "#ifdef cl_khr_fp16\n" |
| 33451 | "half __ovld __cnfn atan2(half y, half x);\n" |
| 33452 | "half2 __ovld __cnfn atan2(half2 y, half2 x);\n" |
| 33453 | "half3 __ovld __cnfn atan2(half3 y, half3 x);\n" |
| 33454 | "half4 __ovld __cnfn atan2(half4 y, half4 x);\n" |
| 33455 | "half8 __ovld __cnfn atan2(half8 y, half8 x);\n" |
| 33456 | "half16 __ovld __cnfn atan2(half16 y, half16 x);\n" |
| 33457 | "#endif //cl_khr_fp16\n" |
| 33458 | "\n" |
| 33459 | "/**\n" |
| 33460 | " * Hyperbolic arc tangent.\n" |
| 33461 | " */\n" |
| 33462 | "float __ovld __cnfn atanh(float);\n" |
| 33463 | "float2 __ovld __cnfn atanh(float2);\n" |
| 33464 | "float3 __ovld __cnfn atanh(float3);\n" |
| 33465 | "float4 __ovld __cnfn atanh(float4);\n" |
| 33466 | "float8 __ovld __cnfn atanh(float8);\n" |
| 33467 | "float16 __ovld __cnfn atanh(float16);\n" |
| 33468 | "#ifdef cl_khr_fp64\n" |
| 33469 | "double __ovld __cnfn atanh(double);\n" |
| 33470 | "double2 __ovld __cnfn atanh(double2);\n" |
| 33471 | "double3 __ovld __cnfn atanh(double3);\n" |
| 33472 | "double4 __ovld __cnfn atanh(double4);\n" |
| 33473 | "double8 __ovld __cnfn atanh(double8);\n" |
| 33474 | "double16 __ovld __cnfn atanh(double16);\n" |
| 33475 | "#endif //cl_khr_fp64\n" |
| 33476 | "#ifdef cl_khr_fp16\n" |
| 33477 | "half __ovld __cnfn atanh(half);\n" |
| 33478 | "half2 __ovld __cnfn atanh(half2);\n" |
| 33479 | "half3 __ovld __cnfn atanh(half3);\n" |
| 33480 | "half4 __ovld __cnfn atanh(half4);\n" |
| 33481 | "half8 __ovld __cnfn atanh(half8);\n" |
| 33482 | "half16 __ovld __cnfn atanh(half16);\n" |
| 33483 | "#endif //cl_khr_fp16\n" |
| 33484 | "\n" |
| 33485 | "/**\n" |
| 33486 | " * Compute atan (x) / PI.\n" |
| 33487 | " */\n" |
| 33488 | "float __ovld __cnfn atanpi(float x);\n" |
| 33489 | "float2 __ovld __cnfn atanpi(float2 x);\n" |
| 33490 | "float3 __ovld __cnfn atanpi(float3 x);\n" |
| 33491 | "float4 __ovld __cnfn atanpi(float4 x);\n" |
| 33492 | "float8 __ovld __cnfn atanpi(float8 x);\n" |
| 33493 | "float16 __ovld __cnfn atanpi(float16 x);\n" |
| 33494 | "#ifdef cl_khr_fp64\n" |
| 33495 | "double __ovld __cnfn atanpi(double x);\n" |
| 33496 | "double2 __ovld __cnfn atanpi(double2 x);\n" |
| 33497 | "double3 __ovld __cnfn atanpi(double3 x);\n" |
| 33498 | "double4 __ovld __cnfn atanpi(double4 x);\n" |
| 33499 | "double8 __ovld __cnfn atanpi(double8 x);\n" |
| 33500 | "double16 __ovld __cnfn atanpi(double16 x);\n" |
| 33501 | "#endif //cl_khr_fp64\n" |
| 33502 | "#ifdef cl_khr_fp16\n" |
| 33503 | "half __ovld __cnfn atanpi(half x);\n" |
| 33504 | "half2 __ovld __cnfn atanpi(half2 x);\n" |
| 33505 | "half3 __ovld __cnfn atanpi(half3 x);\n" |
| 33506 | "half4 __ovld __cnfn atanpi(half4 x);\n" |
| 33507 | "half8 __ovld __cnfn atanpi(half8 x);\n" |
| 33508 | "half16 __ovld __cnfn atanpi(half16 x);\n" |
| 33509 | "#endif //cl_khr_fp16\n" |
| 33510 | "\n" |
| 33511 | "/**\n" |
| 33512 | " * Compute atan2 (y, x) / PI.\n" |
| 33513 | " */\n" |
| 33514 | "float __ovld __cnfn atan2pi(float y, float x);\n" |
| 33515 | "float2 __ovld __cnfn atan2pi(float2 y, float2 x);\n" |
| 33516 | "float3 __ovld __cnfn atan2pi(float3 y, float3 x);\n" |
| 33517 | "float4 __ovld __cnfn atan2pi(float4 y, float4 x);\n" |
| 33518 | "float8 __ovld __cnfn atan2pi(float8 y, float8 x);\n" |
| 33519 | "float16 __ovld __cnfn atan2pi(float16 y, float16 x);\n" |
| 33520 | "#ifdef cl_khr_fp64\n" |
| 33521 | "double __ovld __cnfn atan2pi(double y, double x);\n" |
| 33522 | "double2 __ovld __cnfn atan2pi(double2 y, double2 x);\n" |
| 33523 | "double3 __ovld __cnfn atan2pi(double3 y, double3 x);\n" |
| 33524 | "double4 __ovld __cnfn atan2pi(double4 y, double4 x);\n" |
| 33525 | "double8 __ovld __cnfn atan2pi(double8 y, double8 x);\n" |
| 33526 | "double16 __ovld __cnfn atan2pi(double16 y, double16 x);\n" |
| 33527 | "#endif //cl_khr_fp64\n" |
| 33528 | "#ifdef cl_khr_fp16\n" |
| 33529 | "half __ovld __cnfn atan2pi(half y, half x);\n" |
| 33530 | "half2 __ovld __cnfn atan2pi(half2 y, half2 x);\n" |
| 33531 | "half3 __ovld __cnfn atan2pi(half3 y, half3 x);\n" |
| 33532 | "half4 __ovld __cnfn atan2pi(half4 y, half4 x);\n" |
| 33533 | "half8 __ovld __cnfn atan2pi(half8 y, half8 x);\n" |
| 33534 | "half16 __ovld __cnfn atan2pi(half16 y, half16 x);\n" |
| 33535 | "#endif //cl_khr_fp16\n" |
| 33536 | "\n" |
| 33537 | "/**\n" |
| 33538 | " * Compute cube-root.\n" |
| 33539 | " */\n" |
| 33540 | "float __ovld __cnfn cbrt(float);\n" |
| 33541 | "float2 __ovld __cnfn cbrt(float2);\n" |
| 33542 | "float3 __ovld __cnfn cbrt(float3);\n" |
| 33543 | "float4 __ovld __cnfn cbrt(float4);\n" |
| 33544 | "float8 __ovld __cnfn cbrt(float8);\n" |
| 33545 | "float16 __ovld __cnfn cbrt(float16);\n" |
| 33546 | "#ifdef cl_khr_fp64\n" |
| 33547 | "double __ovld __cnfn cbrt(double);\n" |
| 33548 | "double2 __ovld __cnfn cbrt(double2);\n" |
| 33549 | "double3 __ovld __cnfn cbrt(double3);\n" |
| 33550 | "double4 __ovld __cnfn cbrt(double4);\n" |
| 33551 | "double8 __ovld __cnfn cbrt(double8);\n" |
| 33552 | "double16 __ovld __cnfn cbrt(double16);\n" |
| 33553 | "#endif //cl_khr_fp64\n" |
| 33554 | "#ifdef cl_khr_fp16\n" |
| 33555 | "half __ovld __cnfn cbrt(half);\n" |
| 33556 | "half2 __ovld __cnfn cbrt(half2);\n" |
| 33557 | "half3 __ovld __cnfn cbrt(half3);\n" |
| 33558 | "half4 __ovld __cnfn cbrt(half4);\n" |
| 33559 | "half8 __ovld __cnfn cbrt(half8);\n" |
| 33560 | "half16 __ovld __cnfn cbrt(half16);\n" |
| 33561 | "#endif //cl_khr_fp16\n" |
| 33562 | "\n" |
| 33563 | "/**\n" |
| 33564 | " * Round to integral value using the round to positive\n" |
| 33565 | " * infinity rounding mode.\n" |
| 33566 | " */\n" |
| 33567 | "float __ovld __cnfn ceil(float);\n" |
| 33568 | "float2 __ovld __cnfn ceil(float2);\n" |
| 33569 | "float3 __ovld __cnfn ceil(float3);\n" |
| 33570 | "float4 __ovld __cnfn ceil(float4);\n" |
| 33571 | "float8 __ovld __cnfn ceil(float8);\n" |
| 33572 | "float16 __ovld __cnfn ceil(float16);\n" |
| 33573 | "#ifdef cl_khr_fp64\n" |
| 33574 | "double __ovld __cnfn ceil(double);\n" |
| 33575 | "double2 __ovld __cnfn ceil(double2);\n" |
| 33576 | "double3 __ovld __cnfn ceil(double3);\n" |
| 33577 | "double4 __ovld __cnfn ceil(double4);\n" |
| 33578 | "double8 __ovld __cnfn ceil(double8);\n" |
| 33579 | "double16 __ovld __cnfn ceil(double16);\n" |
| 33580 | "#endif //cl_khr_fp64\n" |
| 33581 | "#ifdef cl_khr_fp16\n" |
| 33582 | "half __ovld __cnfn ceil(half);\n" |
| 33583 | "half2 __ovld __cnfn ceil(half2);\n" |
| 33584 | "half3 __ovld __cnfn ceil(half3);\n" |
| 33585 | "half4 __ovld __cnfn ceil(half4);\n" |
| 33586 | "half8 __ovld __cnfn ceil(half8);\n" |
| 33587 | "half16 __ovld __cnfn ceil(half16);\n" |
| 33588 | "#endif //cl_khr_fp16\n" |
| 33589 | "\n" |
| 33590 | "/**\n" |
| 33591 | " * Returns x with its sign changed to match the sign of y.\n" |
| 33592 | " */\n" |
| 33593 | "float __ovld __cnfn copysign(float x, float y);\n" |
| 33594 | "float2 __ovld __cnfn copysign(float2 x, float2 y);\n" |
| 33595 | "float3 __ovld __cnfn copysign(float3 x, float3 y);\n" |
| 33596 | "float4 __ovld __cnfn copysign(float4 x, float4 y);\n" |
| 33597 | "float8 __ovld __cnfn copysign(float8 x, float8 y);\n" |
| 33598 | "float16 __ovld __cnfn copysign(float16 x, float16 y);\n" |
| 33599 | "#ifdef cl_khr_fp64\n" |
| 33600 | "double __ovld __cnfn copysign(double x, double y);\n" |
| 33601 | "double2 __ovld __cnfn copysign(double2 x, double2 y);\n" |
| 33602 | "double3 __ovld __cnfn copysign(double3 x, double3 y);\n" |
| 33603 | "double4 __ovld __cnfn copysign(double4 x, double4 y);\n" |
| 33604 | "double8 __ovld __cnfn copysign(double8 x, double8 y);\n" |
| 33605 | "double16 __ovld __cnfn copysign(double16 x, double16 y);\n" |
| 33606 | "#endif //cl_khr_fp64\n" |
| 33607 | "#ifdef cl_khr_fp16\n" |
| 33608 | "half __ovld __cnfn copysign(half x, half y);\n" |
| 33609 | "half2 __ovld __cnfn copysign(half2 x, half2 y);\n" |
| 33610 | "half3 __ovld __cnfn copysign(half3 x, half3 y);\n" |
| 33611 | "half4 __ovld __cnfn copysign(half4 x, half4 y);\n" |
| 33612 | "half8 __ovld __cnfn copysign(half8 x, half8 y);\n" |
| 33613 | "half16 __ovld __cnfn copysign(half16 x, half16 y);\n" |
| 33614 | "#endif //cl_khr_fp16\n" |
| 33615 | "\n" |
| 33616 | "/**\n" |
| 33617 | " * Compute cosine.\n" |
| 33618 | " */\n" |
| 33619 | "float __ovld __cnfn cos(float);\n" |
| 33620 | "float2 __ovld __cnfn cos(float2);\n" |
| 33621 | "float3 __ovld __cnfn cos(float3);\n" |
| 33622 | "float4 __ovld __cnfn cos(float4);\n" |
| 33623 | "float8 __ovld __cnfn cos(float8);\n" |
| 33624 | "float16 __ovld __cnfn cos(float16);\n" |
| 33625 | "#ifdef cl_khr_fp64\n" |
| 33626 | "double __ovld __cnfn cos(double);\n" |
| 33627 | "double2 __ovld __cnfn cos(double2);\n" |
| 33628 | "double3 __ovld __cnfn cos(double3);\n" |
| 33629 | "double4 __ovld __cnfn cos(double4);\n" |
| 33630 | "double8 __ovld __cnfn cos(double8);\n" |
| 33631 | "double16 __ovld __cnfn cos(double16);\n" |
| 33632 | "#endif //cl_khr_fp64\n" |
| 33633 | "#ifdef cl_khr_fp16\n" |
| 33634 | "half __ovld __cnfn cos(half);\n" |
| 33635 | "half2 __ovld __cnfn cos(half2);\n" |
| 33636 | "half3 __ovld __cnfn cos(half3);\n" |
| 33637 | "half4 __ovld __cnfn cos(half4);\n" |
| 33638 | "half8 __ovld __cnfn cos(half8);\n" |
| 33639 | "half16 __ovld __cnfn cos(half16);\n" |
| 33640 | "#endif //cl_khr_fp16\n" |
| 33641 | "\n" |
| 33642 | "/**\n" |
| 33643 | " * Compute hyperbolic cosine.\n" |
| 33644 | " */\n" |
| 33645 | "float __ovld __cnfn cosh(float);\n" |
| 33646 | "float2 __ovld __cnfn cosh(float2);\n" |
| 33647 | "float3 __ovld __cnfn cosh(float3);\n" |
| 33648 | "float4 __ovld __cnfn cosh(float4);\n" |
| 33649 | "float8 __ovld __cnfn cosh(float8);\n" |
| 33650 | "float16 __ovld __cnfn cosh(float16);\n" |
| 33651 | "#ifdef cl_khr_fp64\n" |
| 33652 | "double __ovld __cnfn cosh(double);\n" |
| 33653 | "double2 __ovld __cnfn cosh(double2);\n" |
| 33654 | "double3 __ovld __cnfn cosh(double3);\n" |
| 33655 | "double4 __ovld __cnfn cosh(double4);\n" |
| 33656 | "double8 __ovld __cnfn cosh(double8);\n" |
| 33657 | "double16 __ovld __cnfn cosh(double16);\n" |
| 33658 | "#endif //cl_khr_fp64\n" |
| 33659 | "#ifdef cl_khr_fp16\n" |
| 33660 | "half __ovld __cnfn cosh(half);\n" |
| 33661 | "half2 __ovld __cnfn cosh(half2);\n" |
| 33662 | "half3 __ovld __cnfn cosh(half3);\n" |
| 33663 | "half4 __ovld __cnfn cosh(half4);\n" |
| 33664 | "half8 __ovld __cnfn cosh(half8);\n" |
| 33665 | "half16 __ovld __cnfn cosh(half16);\n" |
| 33666 | "#endif //cl_khr_fp16\n" |
| 33667 | "\n" |
| 33668 | "/**\n" |
| 33669 | " * Compute cos (PI * x).\n" |
| 33670 | " */\n" |
| 33671 | "float __ovld __cnfn cospi(float x);\n" |
| 33672 | "float2 __ovld __cnfn cospi(float2 x);\n" |
| 33673 | "float3 __ovld __cnfn cospi(float3 x);\n" |
| 33674 | "float4 __ovld __cnfn cospi(float4 x);\n" |
| 33675 | "float8 __ovld __cnfn cospi(float8 x);\n" |
| 33676 | "float16 __ovld __cnfn cospi(float16 x);\n" |
| 33677 | "#ifdef cl_khr_fp64\n" |
| 33678 | "double __ovld __cnfn cospi(double x);\n" |
| 33679 | "double2 __ovld __cnfn cospi(double2 x);\n" |
| 33680 | "double3 __ovld __cnfn cospi(double3 x);\n" |
| 33681 | "double4 __ovld __cnfn cospi(double4 x);\n" |
| 33682 | "double8 __ovld __cnfn cospi(double8 x);\n" |
| 33683 | "double16 __ovld __cnfn cospi(double16 x);\n" |
| 33684 | "#endif //cl_khr_fp64\n" |
| 33685 | "#ifdef cl_khr_fp16\n" |
| 33686 | "half __ovld __cnfn cospi(half x);\n" |
| 33687 | "half2 __ovld __cnfn cospi(half2 x);\n" |
| 33688 | "half3 __ovld __cnfn cospi(half3 x);\n" |
| 33689 | "half4 __ovld __cnfn cospi(half4 x);\n" |
| 33690 | "half8 __ovld __cnfn cospi(half8 x);\n" |
| 33691 | "half16 __ovld __cnfn cospi(half16 x);\n" |
| 33692 | "#endif //cl_khr_fp16\n" |
| 33693 | "\n" |
| 33694 | "/**\n" |
| 33695 | " * Complementary error function.\n" |
| 33696 | " */\n" |
| 33697 | "float __ovld __cnfn erfc(float);\n" |
| 33698 | "float2 __ovld __cnfn erfc(float2);\n" |
| 33699 | "float3 __ovld __cnfn erfc(float3);\n" |
| 33700 | "float4 __ovld __cnfn erfc(float4);\n" |
| 33701 | "float8 __ovld __cnfn erfc(float8);\n" |
| 33702 | "float16 __ovld __cnfn erfc(float16);\n" |
| 33703 | "#ifdef cl_khr_fp64\n" |
| 33704 | "double __ovld __cnfn erfc(double);\n" |
| 33705 | "double2 __ovld __cnfn erfc(double2);\n" |
| 33706 | "double3 __ovld __cnfn erfc(double3);\n" |
| 33707 | "double4 __ovld __cnfn erfc(double4);\n" |
| 33708 | "double8 __ovld __cnfn erfc(double8);\n" |
| 33709 | "double16 __ovld __cnfn erfc(double16);\n" |
| 33710 | "#endif //cl_khr_fp64\n" |
| 33711 | "#ifdef cl_khr_fp16\n" |
| 33712 | "half __ovld __cnfn erfc(half);\n" |
| 33713 | "half2 __ovld __cnfn erfc(half2);\n" |
| 33714 | "half3 __ovld __cnfn erfc(half3);\n" |
| 33715 | "half4 __ovld __cnfn erfc(half4);\n" |
| 33716 | "half8 __ovld __cnfn erfc(half8);\n" |
| 33717 | "half16 __ovld __cnfn erfc(half16);\n" |
| 33718 | "#endif //cl_khr_fp16\n" |
| 33719 | "\n" |
| 33720 | "/**\n" |
| 33721 | " * Error function encountered in integrating the\n" |
| 33722 | " * normal distribution.\n" |
| 33723 | " */\n" |
| 33724 | "float __ovld __cnfn erf(float);\n" |
| 33725 | "float2 __ovld __cnfn erf(float2);\n" |
| 33726 | "float3 __ovld __cnfn erf(float3);\n" |
| 33727 | "float4 __ovld __cnfn erf(float4);\n" |
| 33728 | "float8 __ovld __cnfn erf(float8);\n" |
| 33729 | "float16 __ovld __cnfn erf(float16);\n" |
| 33730 | "#ifdef cl_khr_fp64\n" |
| 33731 | "double __ovld __cnfn erf(double);\n" |
| 33732 | "double2 __ovld __cnfn erf(double2);\n" |
| 33733 | "double3 __ovld __cnfn erf(double3);\n" |
| 33734 | "double4 __ovld __cnfn erf(double4);\n" |
| 33735 | "double8 __ovld __cnfn erf(double8);\n" |
| 33736 | "double16 __ovld __cnfn erf(double16);\n" |
| 33737 | "#endif //cl_khr_fp64\n" |
| 33738 | "#ifdef cl_khr_fp16\n" |
| 33739 | "half __ovld __cnfn erf(half);\n" |
| 33740 | "half2 __ovld __cnfn erf(half2);\n" |
| 33741 | "half3 __ovld __cnfn erf(half3);\n" |
| 33742 | "half4 __ovld __cnfn erf(half4);\n" |
| 33743 | "half8 __ovld __cnfn erf(half8);\n" |
| 33744 | "half16 __ovld __cnfn erf(half16);\n" |
| 33745 | "#endif //cl_khr_fp16\n" |
| 33746 | "\n" |
| 33747 | "/**\n" |
| 33748 | " * Compute the base e exponential function of x.\n" |
| 33749 | " */\n" |
| 33750 | "float __ovld __cnfn exp(float x);\n" |
| 33751 | "float2 __ovld __cnfn exp(float2 x);\n" |
| 33752 | "float3 __ovld __cnfn exp(float3 x);\n" |
| 33753 | "float4 __ovld __cnfn exp(float4 x);\n" |
| 33754 | "float8 __ovld __cnfn exp(float8 x);\n" |
| 33755 | "float16 __ovld __cnfn exp(float16 x);\n" |
| 33756 | "#ifdef cl_khr_fp64\n" |
| 33757 | "double __ovld __cnfn exp(double x);\n" |
| 33758 | "double2 __ovld __cnfn exp(double2 x);\n" |
| 33759 | "double3 __ovld __cnfn exp(double3 x);\n" |
| 33760 | "double4 __ovld __cnfn exp(double4 x);\n" |
| 33761 | "double8 __ovld __cnfn exp(double8 x);\n" |
| 33762 | "double16 __ovld __cnfn exp(double16 x);\n" |
| 33763 | "#endif //cl_khr_fp64\n" |
| 33764 | "#ifdef cl_khr_fp16\n" |
| 33765 | "half __ovld __cnfn exp(half x);\n" |
| 33766 | "half2 __ovld __cnfn exp(half2 x);\n" |
| 33767 | "half3 __ovld __cnfn exp(half3 x);\n" |
| 33768 | "half4 __ovld __cnfn exp(half4 x);\n" |
| 33769 | "half8 __ovld __cnfn exp(half8 x);\n" |
| 33770 | "half16 __ovld __cnfn exp(half16 x);\n" |
| 33771 | "#endif //cl_khr_fp16\n" |
| 33772 | "\n" |
| 33773 | "/**\n" |
| 33774 | " * Exponential base 2 function.\n" |
| 33775 | " */\n" |
| 33776 | "float __ovld __cnfn exp2(float);\n" |
| 33777 | "float2 __ovld __cnfn exp2(float2);\n" |
| 33778 | "float3 __ovld __cnfn exp2(float3);\n" |
| 33779 | "float4 __ovld __cnfn exp2(float4);\n" |
| 33780 | "float8 __ovld __cnfn exp2(float8);\n" |
| 33781 | "float16 __ovld __cnfn exp2(float16);\n" |
| 33782 | "#ifdef cl_khr_fp64\n" |
| 33783 | "double __ovld __cnfn exp2(double);\n" |
| 33784 | "double2 __ovld __cnfn exp2(double2);\n" |
| 33785 | "double3 __ovld __cnfn exp2(double3);\n" |
| 33786 | "double4 __ovld __cnfn exp2(double4);\n" |
| 33787 | "double8 __ovld __cnfn exp2(double8);\n" |
| 33788 | "double16 __ovld __cnfn exp2(double16);\n" |
| 33789 | "#endif //cl_khr_fp64\n" |
| 33790 | "#ifdef cl_khr_fp16\n" |
| 33791 | "half __ovld __cnfn exp2(half);\n" |
| 33792 | "half2 __ovld __cnfn exp2(half2);\n" |
| 33793 | "half3 __ovld __cnfn exp2(half3);\n" |
| 33794 | "half4 __ovld __cnfn exp2(half4);\n" |
| 33795 | "half8 __ovld __cnfn exp2(half8);\n" |
| 33796 | "half16 __ovld __cnfn exp2(half16);\n" |
| 33797 | "#endif //cl_khr_fp16\n" |
| 33798 | "\n" |
| 33799 | "/**\n" |
| 33800 | " * Exponential base 10 function.\n" |
| 33801 | " */\n" |
| 33802 | "float __ovld __cnfn exp10(float);\n" |
| 33803 | "float2 __ovld __cnfn exp10(float2);\n" |
| 33804 | "float3 __ovld __cnfn exp10(float3);\n" |
| 33805 | "float4 __ovld __cnfn exp10(float4);\n" |
| 33806 | "float8 __ovld __cnfn exp10(float8);\n" |
| 33807 | "float16 __ovld __cnfn exp10(float16);\n" |
| 33808 | "#ifdef cl_khr_fp64\n" |
| 33809 | "double __ovld __cnfn exp10(double);\n" |
| 33810 | "double2 __ovld __cnfn exp10(double2);\n" |
| 33811 | "double3 __ovld __cnfn exp10(double3);\n" |
| 33812 | "double4 __ovld __cnfn exp10(double4);\n" |
| 33813 | "double8 __ovld __cnfn exp10(double8);\n" |
| 33814 | "double16 __ovld __cnfn exp10(double16);\n" |
| 33815 | "#endif //cl_khr_fp64\n" |
| 33816 | "#ifdef cl_khr_fp16\n" |
| 33817 | "half __ovld __cnfn exp10(half);\n" |
| 33818 | "half2 __ovld __cnfn exp10(half2);\n" |
| 33819 | "half3 __ovld __cnfn exp10(half3);\n" |
| 33820 | "half4 __ovld __cnfn exp10(half4);\n" |
| 33821 | "half8 __ovld __cnfn exp10(half8);\n" |
| 33822 | "half16 __ovld __cnfn exp10(half16);\n" |
| 33823 | "#endif //cl_khr_fp16\n" |
| 33824 | "\n" |
| 33825 | "/**\n" |
| 33826 | " * Compute e^x- 1.0.\n" |
| 33827 | " */\n" |
| 33828 | "float __ovld __cnfn expm1(float x);\n" |
| 33829 | "float2 __ovld __cnfn expm1(float2 x);\n" |
| 33830 | "float3 __ovld __cnfn expm1(float3 x);\n" |
| 33831 | "float4 __ovld __cnfn expm1(float4 x);\n" |
| 33832 | "float8 __ovld __cnfn expm1(float8 x);\n" |
| 33833 | "float16 __ovld __cnfn expm1(float16 x);\n" |
| 33834 | "#ifdef cl_khr_fp64\n" |
| 33835 | "double __ovld __cnfn expm1(double x);\n" |
| 33836 | "double2 __ovld __cnfn expm1(double2 x);\n" |
| 33837 | "double3 __ovld __cnfn expm1(double3 x);\n" |
| 33838 | "double4 __ovld __cnfn expm1(double4 x);\n" |
| 33839 | "double8 __ovld __cnfn expm1(double8 x);\n" |
| 33840 | "double16 __ovld __cnfn expm1(double16 x);\n" |
| 33841 | "#endif //cl_khr_fp64\n" |
| 33842 | "#ifdef cl_khr_fp16\n" |
| 33843 | "half __ovld __cnfn expm1(half x);\n" |
| 33844 | "half2 __ovld __cnfn expm1(half2 x);\n" |
| 33845 | "half3 __ovld __cnfn expm1(half3 x);\n" |
| 33846 | "half4 __ovld __cnfn expm1(half4 x);\n" |
| 33847 | "half8 __ovld __cnfn expm1(half8 x);\n" |
| 33848 | "half16 __ovld __cnfn expm1(half16 x);\n" |
| 33849 | "#endif //cl_khr_fp16\n" |
| 33850 | "\n" |
| 33851 | "/**\n" |
| 33852 | " * Compute absolute value of a floating-point number.\n" |
| 33853 | " */\n" |
| 33854 | "float __ovld __cnfn fabs(float);\n" |
| 33855 | "float2 __ovld __cnfn fabs(float2);\n" |
| 33856 | "float3 __ovld __cnfn fabs(float3);\n" |
| 33857 | "float4 __ovld __cnfn fabs(float4);\n" |
| 33858 | "float8 __ovld __cnfn fabs(float8);\n" |
| 33859 | "float16 __ovld __cnfn fabs(float16);\n" |
| 33860 | "#ifdef cl_khr_fp64\n" |
| 33861 | "double __ovld __cnfn fabs(double);\n" |
| 33862 | "double2 __ovld __cnfn fabs(double2);\n" |
| 33863 | "double3 __ovld __cnfn fabs(double3);\n" |
| 33864 | "double4 __ovld __cnfn fabs(double4);\n" |
| 33865 | "double8 __ovld __cnfn fabs(double8);\n" |
| 33866 | "double16 __ovld __cnfn fabs(double16);\n" |
| 33867 | "#endif //cl_khr_fp64\n" |
| 33868 | "#ifdef cl_khr_fp16\n" |
| 33869 | "half __ovld __cnfn fabs(half);\n" |
| 33870 | "half2 __ovld __cnfn fabs(half2);\n" |
| 33871 | "half3 __ovld __cnfn fabs(half3);\n" |
| 33872 | "half4 __ovld __cnfn fabs(half4);\n" |
| 33873 | "half8 __ovld __cnfn fabs(half8);\n" |
| 33874 | "half16 __ovld __cnfn fabs(half16);\n" |
| 33875 | "#endif //cl_khr_fp16\n" |
| 33876 | "\n" |
| 33877 | "/**\n" |
| 33878 | " * x - y if x > y, +0 if x is less than or equal to y.\n" |
| 33879 | " */\n" |
| 33880 | "float __ovld __cnfn fdim(float x, float y);\n" |
| 33881 | "float2 __ovld __cnfn fdim(float2 x, float2 y);\n" |
| 33882 | "float3 __ovld __cnfn fdim(float3 x, float3 y);\n" |
| 33883 | "float4 __ovld __cnfn fdim(float4 x, float4 y);\n" |
| 33884 | "float8 __ovld __cnfn fdim(float8 x, float8 y);\n" |
| 33885 | "float16 __ovld __cnfn fdim(float16 x, float16 y);\n" |
| 33886 | "#ifdef cl_khr_fp64\n" |
| 33887 | "double __ovld __cnfn fdim(double x, double y);\n" |
| 33888 | "double2 __ovld __cnfn fdim(double2 x, double2 y);\n" |
| 33889 | "double3 __ovld __cnfn fdim(double3 x, double3 y);\n" |
| 33890 | "double4 __ovld __cnfn fdim(double4 x, double4 y);\n" |
| 33891 | "double8 __ovld __cnfn fdim(double8 x, double8 y);\n" |
| 33892 | "double16 __ovld __cnfn fdim(double16 x, double16 y);\n" |
| 33893 | "#endif //cl_khr_fp64\n" |
| 33894 | "#ifdef cl_khr_fp16\n" |
| 33895 | "half __ovld __cnfn fdim(half x, half y);\n" |
| 33896 | "half2 __ovld __cnfn fdim(half2 x, half2 y);\n" |
| 33897 | "half3 __ovld __cnfn fdim(half3 x, half3 y);\n" |
| 33898 | "half4 __ovld __cnfn fdim(half4 x, half4 y);\n" |
| 33899 | "half8 __ovld __cnfn fdim(half8 x, half8 y);\n" |
| 33900 | "half16 __ovld __cnfn fdim(half16 x, half16 y);\n" |
| 33901 | "#endif //cl_khr_fp16\n" |
| 33902 | "\n" |
| 33903 | "/**\n" |
| 33904 | " * Round to integral value using the round to -ve\n" |
| 33905 | " * infinity rounding mode.\n" |
| 33906 | " */\n" |
| 33907 | "float __ovld __cnfn floor(float);\n" |
| 33908 | "float2 __ovld __cnfn floor(float2);\n" |
| 33909 | "float3 __ovld __cnfn floor(float3);\n" |
| 33910 | "float4 __ovld __cnfn floor(float4);\n" |
| 33911 | "float8 __ovld __cnfn floor(float8);\n" |
| 33912 | "float16 __ovld __cnfn floor(float16);\n" |
| 33913 | "#ifdef cl_khr_fp64\n" |
| 33914 | "double __ovld __cnfn floor(double);\n" |
| 33915 | "double2 __ovld __cnfn floor(double2);\n" |
| 33916 | "double3 __ovld __cnfn floor(double3);\n" |
| 33917 | "double4 __ovld __cnfn floor(double4);\n" |
| 33918 | "double8 __ovld __cnfn floor(double8);\n" |
| 33919 | "double16 __ovld __cnfn floor(double16);\n" |
| 33920 | "#endif //cl_khr_fp64\n" |
| 33921 | "#ifdef cl_khr_fp16\n" |
| 33922 | "half __ovld __cnfn floor(half);\n" |
| 33923 | "half2 __ovld __cnfn floor(half2);\n" |
| 33924 | "half3 __ovld __cnfn floor(half3);\n" |
| 33925 | "half4 __ovld __cnfn floor(half4);\n" |
| 33926 | "half8 __ovld __cnfn floor(half8);\n" |
| 33927 | "half16 __ovld __cnfn floor(half16);\n" |
| 33928 | "#endif //cl_khr_fp16\n" |
| 33929 | "\n" |
| 33930 | "/**\n" |
| 33931 | " * Returns the correctly rounded floating-point\n" |
| 33932 | " * representation of the sum of c with the infinitely\n" |
| 33933 | " * precise product of a and b. Rounding of\n" |
| 33934 | " * intermediate products shall not occur. Edge case\n" |
| 33935 | " * behavior is per the IEEE 754-2008 standard.\n" |
| 33936 | " */\n" |
| 33937 | "float __ovld __cnfn fma(float a, float b, float c);\n" |
| 33938 | "float2 __ovld __cnfn fma(float2 a, float2 b, float2 c);\n" |
| 33939 | "float3 __ovld __cnfn fma(float3 a, float3 b, float3 c);\n" |
| 33940 | "float4 __ovld __cnfn fma(float4 a, float4 b, float4 c);\n" |
| 33941 | "float8 __ovld __cnfn fma(float8 a, float8 b, float8 c);\n" |
| 33942 | "float16 __ovld __cnfn fma(float16 a, float16 b, float16 c);\n" |
| 33943 | "#ifdef cl_khr_fp64\n" |
| 33944 | "double __ovld __cnfn fma(double a, double b, double c);\n" |
| 33945 | "double2 __ovld __cnfn fma(double2 a, double2 b, double2 c);\n" |
| 33946 | "double3 __ovld __cnfn fma(double3 a, double3 b, double3 c);\n" |
| 33947 | "double4 __ovld __cnfn fma(double4 a, double4 b, double4 c);\n" |
| 33948 | "double8 __ovld __cnfn fma(double8 a, double8 b, double8 c);\n" |
| 33949 | "double16 __ovld __cnfn fma(double16 a, double16 b, double16 c);\n" |
| 33950 | "#endif //cl_khr_fp64\n" |
| 33951 | "#ifdef cl_khr_fp16\n" |
| 33952 | "half __ovld __cnfn fma(half a, half b, half c);\n" |
| 33953 | "half2 __ovld __cnfn fma(half2 a, half2 b, half2 c);\n" |
| 33954 | "half3 __ovld __cnfn fma(half3 a, half3 b, half3 c);\n" |
| 33955 | "half4 __ovld __cnfn fma(half4 a, half4 b, half4 c);\n" |
| 33956 | "half8 __ovld __cnfn fma(half8 a, half8 b, half8 c);\n" |
| 33957 | "half16 __ovld __cnfn fma(half16 a, half16 b, half16 c);\n" |
| 33958 | "#endif //cl_khr_fp16\n" |
| 33959 | "\n" |
| 33960 | "/**\n" |
| 33961 | " * Returns y if x < y, otherwise it returns x. If one\n" |
| 33962 | " * argument is a NaN, fmax() returns the other\n" |
| 33963 | " * argument. If both arguments are NaNs, fmax()\n" |
| 33964 | " * returns a NaN.\n" |
| 33965 | " */\n" |
| 33966 | "float __ovld __cnfn fmax(float x, float y);\n" |
| 33967 | "float2 __ovld __cnfn fmax(float2 x, float2 y);\n" |
| 33968 | "float3 __ovld __cnfn fmax(float3 x, float3 y);\n" |
| 33969 | "float4 __ovld __cnfn fmax(float4 x, float4 y);\n" |
| 33970 | "float8 __ovld __cnfn fmax(float8 x, float8 y);\n" |
| 33971 | "float16 __ovld __cnfn fmax(float16 x, float16 y);\n" |
| 33972 | "float2 __ovld __cnfn fmax(float2 x, float y);\n" |
| 33973 | "float3 __ovld __cnfn fmax(float3 x, float y);\n" |
| 33974 | "float4 __ovld __cnfn fmax(float4 x, float y);\n" |
| 33975 | "float8 __ovld __cnfn fmax(float8 x, float y);\n" |
| 33976 | "float16 __ovld __cnfn fmax(float16 x, float y);\n" |
| 33977 | "#ifdef cl_khr_fp64\n" |
| 33978 | "double __ovld __cnfn fmax(double x, double y);\n" |
| 33979 | "double2 __ovld __cnfn fmax(double2 x, double2 y);\n" |
| 33980 | "double3 __ovld __cnfn fmax(double3 x, double3 y);\n" |
| 33981 | "double4 __ovld __cnfn fmax(double4 x, double4 y);\n" |
| 33982 | "double8 __ovld __cnfn fmax(double8 x, double8 y);\n" |
| 33983 | "double16 __ovld __cnfn fmax(double16 x, double16 y);\n" |
| 33984 | "double2 __ovld __cnfn fmax(double2 x, double y);\n" |
| 33985 | "double3 __ovld __cnfn fmax(double3 x, double y);\n" |
| 33986 | "double4 __ovld __cnfn fmax(double4 x, double y);\n" |
| 33987 | "double8 __ovld __cnfn fmax(double8 x, double y);\n" |
| 33988 | "double16 __ovld __cnfn fmax(double16 x, double y);\n" |
| 33989 | "#endif //cl_khr_fp64\n" |
| 33990 | "#ifdef cl_khr_fp16\n" |
| 33991 | "half __ovld __cnfn fmax(half x, half y);\n" |
| 33992 | "half2 __ovld __cnfn fmax(half2 x, half2 y);\n" |
| 33993 | "half3 __ovld __cnfn fmax(half3 x, half3 y);\n" |
| 33994 | "half4 __ovld __cnfn fmax(half4 x, half4 y);\n" |
| 33995 | "half8 __ovld __cnfn fmax(half8 x, half8 y);\n" |
| 33996 | "half16 __ovld __cnfn fmax(half16 x, half16 y);\n" |
| 33997 | "half2 __ovld __cnfn fmax(half2 x, half y);\n" |
| 33998 | "half3 __ovld __cnfn fmax(half3 x, half y);\n" |
| 33999 | "half4 __ovld __cnfn fmax(half4 x, half y);\n" |
| 34000 | "half8 __ovld __cnfn fmax(half8 x, half y);\n" |
| 34001 | "half16 __ovld __cnfn fmax(half16 x, half y);\n" |
| 34002 | "#endif //cl_khr_fp16\n" |
| 34003 | "\n" |
| 34004 | "/**\n" |
| 34005 | " * Returns y if y < x, otherwise it returns x. If one\n" |
| 34006 | " * argument is a NaN, fmin() returns the other\n" |
| 34007 | " * argument. If both arguments are NaNs, fmin()\n" |
| 34008 | " * returns a NaN.\n" |
| 34009 | " */\n" |
| 34010 | "float __ovld __cnfn fmin(float x, float y);\n" |
| 34011 | "float2 __ovld __cnfn fmin(float2 x, float2 y);\n" |
| 34012 | "float3 __ovld __cnfn fmin(float3 x, float3 y);\n" |
| 34013 | "float4 __ovld __cnfn fmin(float4 x, float4 y);\n" |
| 34014 | "float8 __ovld __cnfn fmin(float8 x, float8 y);\n" |
| 34015 | "float16 __ovld __cnfn fmin(float16 x, float16 y);\n" |
| 34016 | "float2 __ovld __cnfn fmin(float2 x, float y);\n" |
| 34017 | "float3 __ovld __cnfn fmin(float3 x, float y);\n" |
| 34018 | "float4 __ovld __cnfn fmin(float4 x, float y);\n" |
| 34019 | "float8 __ovld __cnfn fmin(float8 x, float y);\n" |
| 34020 | "float16 __ovld __cnfn fmin(float16 x, float y);\n" |
| 34021 | "#ifdef cl_khr_fp64\n" |
| 34022 | "double __ovld __cnfn fmin(double x, double y);\n" |
| 34023 | "double2 __ovld __cnfn fmin(double2 x, double2 y);\n" |
| 34024 | "double3 __ovld __cnfn fmin(double3 x, double3 y);\n" |
| 34025 | "double4 __ovld __cnfn fmin(double4 x, double4 y);\n" |
| 34026 | "double8 __ovld __cnfn fmin(double8 x, double8 y);\n" |
| 34027 | "double16 __ovld __cnfn fmin(double16 x, double16 y);\n" |
| 34028 | "double2 __ovld __cnfn fmin(double2 x, double y);\n" |
| 34029 | "double3 __ovld __cnfn fmin(double3 x, double y);\n" |
| 34030 | "double4 __ovld __cnfn fmin(double4 x, double y);\n" |
| 34031 | "double8 __ovld __cnfn fmin(double8 x, double y);\n" |
| 34032 | "double16 __ovld __cnfn fmin(double16 x, double y);\n" |
| 34033 | "#endif //cl_khr_fp64\n" |
| 34034 | "#ifdef cl_khr_fp16\n" |
| 34035 | "half __ovld __cnfn fmin(half x, half y);\n" |
| 34036 | "half2 __ovld __cnfn fmin(half2 x, half2 y);\n" |
| 34037 | "half3 __ovld __cnfn fmin(half3 x, half3 y);\n" |
| 34038 | "half4 __ovld __cnfn fmin(half4 x, half4 y);\n" |
| 34039 | "half8 __ovld __cnfn fmin(half8 x, half8 y);\n" |
| 34040 | "half16 __ovld __cnfn fmin(half16 x, half16 y);\n" |
| 34041 | "half2 __ovld __cnfn fmin(half2 x, half y);\n" |
| 34042 | "half3 __ovld __cnfn fmin(half3 x, half y);\n" |
| 34043 | "half4 __ovld __cnfn fmin(half4 x, half y);\n" |
| 34044 | "half8 __ovld __cnfn fmin(half8 x, half y);\n" |
| 34045 | "half16 __ovld __cnfn fmin(half16 x, half y);\n" |
| 34046 | "#endif //cl_khr_fp16\n" |
| 34047 | "\n" |
| 34048 | "/**\n" |
| 34049 | " * Modulus. Returns x - y * trunc (x/y).\n" |
| 34050 | " */\n" |
| 34051 | "float __ovld __cnfn fmod(float x, float y);\n" |
| 34052 | "float2 __ovld __cnfn fmod(float2 x, float2 y);\n" |
| 34053 | "float3 __ovld __cnfn fmod(float3 x, float3 y);\n" |
| 34054 | "float4 __ovld __cnfn fmod(float4 x, float4 y);\n" |
| 34055 | "float8 __ovld __cnfn fmod(float8 x, float8 y);\n" |
| 34056 | "float16 __ovld __cnfn fmod(float16 x, float16 y);\n" |
| 34057 | "#ifdef cl_khr_fp64\n" |
| 34058 | "double __ovld __cnfn fmod(double x, double y);\n" |
| 34059 | "double2 __ovld __cnfn fmod(double2 x, double2 y);\n" |
| 34060 | "double3 __ovld __cnfn fmod(double3 x, double3 y);\n" |
| 34061 | "double4 __ovld __cnfn fmod(double4 x, double4 y);\n" |
| 34062 | "double8 __ovld __cnfn fmod(double8 x, double8 y);\n" |
| 34063 | "double16 __ovld __cnfn fmod(double16 x, double16 y);\n" |
| 34064 | "#endif //cl_khr_fp64\n" |
| 34065 | "#ifdef cl_khr_fp16\n" |
| 34066 | "half __ovld __cnfn fmod(half x, half y);\n" |
| 34067 | "half2 __ovld __cnfn fmod(half2 x, half2 y);\n" |
| 34068 | "half3 __ovld __cnfn fmod(half3 x, half3 y);\n" |
| 34069 | "half4 __ovld __cnfn fmod(half4 x, half4 y);\n" |
| 34070 | "half8 __ovld __cnfn fmod(half8 x, half8 y);\n" |
| 34071 | "half16 __ovld __cnfn fmod(half16 x, half16 y);\n" |
| 34072 | "#endif //cl_khr_fp16\n" |
| 34073 | "\n" |
| 34074 | "/**\n" |
| 34075 | " * Returns fmin(x - floor (x), 0x1.fffffep-1f ).\n" |
| 34076 | " * floor(x) is returned in iptr.\n" |
| 34077 | " */\n" |
| 34078 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34079 | "float __ovld fract(float x, float *iptr);\n" |
| 34080 | "float2 __ovld fract(float2 x, float2 *iptr);\n" |
| 34081 | "float3 __ovld fract(float3 x, float3 *iptr);\n" |
| 34082 | "float4 __ovld fract(float4 x, float4 *iptr);\n" |
| 34083 | "float8 __ovld fract(float8 x, float8 *iptr);\n" |
| 34084 | "float16 __ovld fract(float16 x, float16 *iptr);\n" |
| 34085 | "#ifdef cl_khr_fp64\n" |
| 34086 | "double __ovld fract(double x, double *iptr);\n" |
| 34087 | "double2 __ovld fract(double2 x, double2 *iptr);\n" |
| 34088 | "double3 __ovld fract(double3 x, double3 *iptr);\n" |
| 34089 | "double4 __ovld fract(double4 x, double4 *iptr);\n" |
| 34090 | "double8 __ovld fract(double8 x, double8 *iptr);\n" |
| 34091 | "double16 __ovld fract(double16 x, double16 *iptr);\n" |
| 34092 | "#endif //cl_khr_fp64\n" |
| 34093 | "#ifdef cl_khr_fp16\n" |
| 34094 | "half __ovld fract(half x, half *iptr);\n" |
| 34095 | "half2 __ovld fract(half2 x, half2 *iptr);\n" |
| 34096 | "half3 __ovld fract(half3 x, half3 *iptr);\n" |
| 34097 | "half4 __ovld fract(half4 x, half4 *iptr);\n" |
| 34098 | "half8 __ovld fract(half8 x, half8 *iptr);\n" |
| 34099 | "half16 __ovld fract(half16 x, half16 *iptr);\n" |
| 34100 | "#endif //cl_khr_fp16\n" |
| 34101 | "#else\n" |
| 34102 | "float __ovld fract(float x, __global float *iptr);\n" |
| 34103 | "float2 __ovld fract(float2 x, __global float2 *iptr);\n" |
| 34104 | "float3 __ovld fract(float3 x, __global float3 *iptr);\n" |
| 34105 | "float4 __ovld fract(float4 x, __global float4 *iptr);\n" |
| 34106 | "float8 __ovld fract(float8 x, __global float8 *iptr);\n" |
| 34107 | "float16 __ovld fract(float16 x, __global float16 *iptr);\n" |
| 34108 | "float __ovld fract(float x, __local float *iptr);\n" |
| 34109 | "float2 __ovld fract(float2 x, __local float2 *iptr);\n" |
| 34110 | "float3 __ovld fract(float3 x, __local float3 *iptr);\n" |
| 34111 | "float4 __ovld fract(float4 x, __local float4 *iptr);\n" |
| 34112 | "float8 __ovld fract(float8 x, __local float8 *iptr);\n" |
| 34113 | "float16 __ovld fract(float16 x, __local float16 *iptr);\n" |
| 34114 | "float __ovld fract(float x, __private float *iptr);\n" |
| 34115 | "float2 __ovld fract(float2 x, __private float2 *iptr);\n" |
| 34116 | "float3 __ovld fract(float3 x, __private float3 *iptr);\n" |
| 34117 | "float4 __ovld fract(float4 x, __private float4 *iptr);\n" |
| 34118 | "float8 __ovld fract(float8 x, __private float8 *iptr);\n" |
| 34119 | "float16 __ovld fract(float16 x, __private float16 *iptr);\n" |
| 34120 | "#ifdef cl_khr_fp64\n" |
| 34121 | "double __ovld fract(double x, __global double *iptr);\n" |
| 34122 | "double2 __ovld fract(double2 x, __global double2 *iptr);\n" |
| 34123 | "double3 __ovld fract(double3 x, __global double3 *iptr);\n" |
| 34124 | "double4 __ovld fract(double4 x, __global double4 *iptr);\n" |
| 34125 | "double8 __ovld fract(double8 x, __global double8 *iptr);\n" |
| 34126 | "double16 __ovld fract(double16 x, __global double16 *iptr);\n" |
| 34127 | "double __ovld fract(double x, __local double *iptr);\n" |
| 34128 | "double2 __ovld fract(double2 x, __local double2 *iptr);\n" |
| 34129 | "double3 __ovld fract(double3 x, __local double3 *iptr);\n" |
| 34130 | "double4 __ovld fract(double4 x, __local double4 *iptr);\n" |
| 34131 | "double8 __ovld fract(double8 x, __local double8 *iptr);\n" |
| 34132 | "double16 __ovld fract(double16 x, __local double16 *iptr);\n" |
| 34133 | "double __ovld fract(double x, __private double *iptr);\n" |
| 34134 | "double2 __ovld fract(double2 x, __private double2 *iptr);\n" |
| 34135 | "double3 __ovld fract(double3 x, __private double3 *iptr);\n" |
| 34136 | "double4 __ovld fract(double4 x, __private double4 *iptr);\n" |
| 34137 | "double8 __ovld fract(double8 x, __private double8 *iptr);\n" |
| 34138 | "double16 __ovld fract(double16 x, __private double16 *iptr);\n" |
| 34139 | "#endif //cl_khr_fp64\n" |
| 34140 | "#ifdef cl_khr_fp16\n" |
| 34141 | "half __ovld fract(half x, __global half *iptr);\n" |
| 34142 | "half2 __ovld fract(half2 x, __global half2 *iptr);\n" |
| 34143 | "half3 __ovld fract(half3 x, __global half3 *iptr);\n" |
| 34144 | "half4 __ovld fract(half4 x, __global half4 *iptr);\n" |
| 34145 | "half8 __ovld fract(half8 x, __global half8 *iptr);\n" |
| 34146 | "half16 __ovld fract(half16 x, __global half16 *iptr);\n" |
| 34147 | "half __ovld fract(half x, __local half *iptr);\n" |
| 34148 | "half2 __ovld fract(half2 x, __local half2 *iptr);\n" |
| 34149 | "half3 __ovld fract(half3 x, __local half3 *iptr);\n" |
| 34150 | "half4 __ovld fract(half4 x, __local half4 *iptr);\n" |
| 34151 | "half8 __ovld fract(half8 x, __local half8 *iptr);\n" |
| 34152 | "half16 __ovld fract(half16 x, __local half16 *iptr);\n" |
| 34153 | "half __ovld fract(half x, __private half *iptr);\n" |
| 34154 | "half2 __ovld fract(half2 x, __private half2 *iptr);\n" |
| 34155 | "half3 __ovld fract(half3 x, __private half3 *iptr);\n" |
| 34156 | "half4 __ovld fract(half4 x, __private half4 *iptr);\n" |
| 34157 | "half8 __ovld fract(half8 x, __private half8 *iptr);\n" |
| 34158 | "half16 __ovld fract(half16 x, __private half16 *iptr);\n" |
| 34159 | "#endif //cl_khr_fp16\n" |
| 34160 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34161 | "\n" |
| 34162 | "/**\n" |
| 34163 | " * Extract mantissa and exponent from x. For each\n" |
| 34164 | " * component the mantissa returned is a float with\n" |
| 34165 | " * magnitude in the interval [1/2, 1) or 0. Each\n" |
| 34166 | " * component of x equals mantissa returned * 2^exp.\n" |
| 34167 | " */\n" |
| 34168 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34169 | "float __ovld frexp(float x, int *exp);\n" |
| 34170 | "float2 __ovld frexp(float2 x, int2 *exp);\n" |
| 34171 | "float3 __ovld frexp(float3 x, int3 *exp);\n" |
| 34172 | "float4 __ovld frexp(float4 x, int4 *exp);\n" |
| 34173 | "float8 __ovld frexp(float8 x, int8 *exp);\n" |
| 34174 | "float16 __ovld frexp(float16 x, int16 *exp);\n" |
| 34175 | "#ifdef cl_khr_fp64\n" |
| 34176 | "double __ovld frexp(double x, int *exp);\n" |
| 34177 | "double2 __ovld frexp(double2 x, int2 *exp);\n" |
| 34178 | "double3 __ovld frexp(double3 x, int3 *exp);\n" |
| 34179 | "double4 __ovld frexp(double4 x, int4 *exp);\n" |
| 34180 | "double8 __ovld frexp(double8 x, int8 *exp);\n" |
| 34181 | "double16 __ovld frexp(double16 x, int16 *exp);\n" |
| 34182 | "#endif //cl_khr_fp64\n" |
| 34183 | "#ifdef cl_khr_fp16\n" |
| 34184 | "half __ovld frexp(half x, int *exp);\n" |
| 34185 | "half2 __ovld frexp(half2 x, int2 *exp);\n" |
| 34186 | "half3 __ovld frexp(half3 x, int3 *exp);\n" |
| 34187 | "half4 __ovld frexp(half4 x, int4 *exp);\n" |
| 34188 | "half8 __ovld frexp(half8 x, int8 *exp);\n" |
| 34189 | "half16 __ovld frexp(half16 x, int16 *exp);\n" |
| 34190 | "#endif //cl_khr_fp16\n" |
| 34191 | "#else\n" |
| 34192 | "float __ovld frexp(float x, __global int *exp);\n" |
| 34193 | "float2 __ovld frexp(float2 x, __global int2 *exp);\n" |
| 34194 | "float3 __ovld frexp(float3 x, __global int3 *exp);\n" |
| 34195 | "float4 __ovld frexp(float4 x, __global int4 *exp);\n" |
| 34196 | "float8 __ovld frexp(float8 x, __global int8 *exp);\n" |
| 34197 | "float16 __ovld frexp(float16 x, __global int16 *exp);\n" |
| 34198 | "float __ovld frexp(float x, __local int *exp);\n" |
| 34199 | "float2 __ovld frexp(float2 x, __local int2 *exp);\n" |
| 34200 | "float3 __ovld frexp(float3 x, __local int3 *exp);\n" |
| 34201 | "float4 __ovld frexp(float4 x, __local int4 *exp);\n" |
| 34202 | "float8 __ovld frexp(float8 x, __local int8 *exp);\n" |
| 34203 | "float16 __ovld frexp(float16 x, __local int16 *exp);\n" |
| 34204 | "float __ovld frexp(float x, __private int *exp);\n" |
| 34205 | "float2 __ovld frexp(float2 x, __private int2 *exp);\n" |
| 34206 | "float3 __ovld frexp(float3 x, __private int3 *exp);\n" |
| 34207 | "float4 __ovld frexp(float4 x, __private int4 *exp);\n" |
| 34208 | "float8 __ovld frexp(float8 x, __private int8 *exp);\n" |
| 34209 | "float16 __ovld frexp(float16 x, __private int16 *exp);\n" |
| 34210 | "#ifdef cl_khr_fp64\n" |
| 34211 | "double __ovld frexp(double x, __global int *exp);\n" |
| 34212 | "double2 __ovld frexp(double2 x, __global int2 *exp);\n" |
| 34213 | "double3 __ovld frexp(double3 x, __global int3 *exp);\n" |
| 34214 | "double4 __ovld frexp(double4 x, __global int4 *exp);\n" |
| 34215 | "double8 __ovld frexp(double8 x, __global int8 *exp);\n" |
| 34216 | "double16 __ovld frexp(double16 x, __global int16 *exp);\n" |
| 34217 | "double __ovld frexp(double x, __local int *exp);\n" |
| 34218 | "double2 __ovld frexp(double2 x, __local int2 *exp);\n" |
| 34219 | "double3 __ovld frexp(double3 x, __local int3 *exp);\n" |
| 34220 | "double4 __ovld frexp(double4 x, __local int4 *exp);\n" |
| 34221 | "double8 __ovld frexp(double8 x, __local int8 *exp);\n" |
| 34222 | "double16 __ovld frexp(double16 x, __local int16 *exp);\n" |
| 34223 | "double __ovld frexp(double x, __private int *exp);\n" |
| 34224 | "double2 __ovld frexp(double2 x, __private int2 *exp);\n" |
| 34225 | "double3 __ovld frexp(double3 x, __private int3 *exp);\n" |
| 34226 | "double4 __ovld frexp(double4 x, __private int4 *exp);\n" |
| 34227 | "double8 __ovld frexp(double8 x, __private int8 *exp);\n" |
| 34228 | "double16 __ovld frexp(double16 x, __private int16 *exp);\n" |
| 34229 | "#endif //cl_khr_fp64\n" |
| 34230 | "#ifdef cl_khr_fp16\n" |
| 34231 | "half __ovld frexp(half x, __global int *exp);\n" |
| 34232 | "half2 __ovld frexp(half2 x, __global int2 *exp);\n" |
| 34233 | "half3 __ovld frexp(half3 x, __global int3 *exp);\n" |
| 34234 | "half4 __ovld frexp(half4 x, __global int4 *exp);\n" |
| 34235 | "half8 __ovld frexp(half8 x, __global int8 *exp);\n" |
| 34236 | "half16 __ovld frexp(half16 x, __global int16 *exp);\n" |
| 34237 | "half __ovld frexp(half x, __local int *exp);\n" |
| 34238 | "half2 __ovld frexp(half2 x, __local int2 *exp);\n" |
| 34239 | "half3 __ovld frexp(half3 x, __local int3 *exp);\n" |
| 34240 | "half4 __ovld frexp(half4 x, __local int4 *exp);\n" |
| 34241 | "half8 __ovld frexp(half8 x, __local int8 *exp);\n" |
| 34242 | "half16 __ovld frexp(half16 x, __local int16 *exp);\n" |
| 34243 | "half __ovld frexp(half x, __private int *exp);\n" |
| 34244 | "half2 __ovld frexp(half2 x, __private int2 *exp);\n" |
| 34245 | "half3 __ovld frexp(half3 x, __private int3 *exp);\n" |
| 34246 | "half4 __ovld frexp(half4 x, __private int4 *exp);\n" |
| 34247 | "half8 __ovld frexp(half8 x, __private int8 *exp);\n" |
| 34248 | "half16 __ovld frexp(half16 x, __private int16 *exp);\n" |
| 34249 | "#endif //cl_khr_fp16\n" |
| 34250 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34251 | "\n" |
| 34252 | "/**\n" |
| 34253 | " * Compute the value of the square root of x^2 + y^2\n" |
| 34254 | " * without undue overflow or underflow.\n" |
| 34255 | " */\n" |
| 34256 | "float __ovld __cnfn hypot(float x, float y);\n" |
| 34257 | "float2 __ovld __cnfn hypot(float2 x, float2 y);\n" |
| 34258 | "float3 __ovld __cnfn hypot(float3 x, float3 y);\n" |
| 34259 | "float4 __ovld __cnfn hypot(float4 x, float4 y);\n" |
| 34260 | "float8 __ovld __cnfn hypot(float8 x, float8 y);\n" |
| 34261 | "float16 __ovld __cnfn hypot(float16 x, float16 y);\n" |
| 34262 | "#ifdef cl_khr_fp64\n" |
| 34263 | "double __ovld __cnfn hypot(double x, double y);\n" |
| 34264 | "double2 __ovld __cnfn hypot(double2 x, double2 y);\n" |
| 34265 | "double3 __ovld __cnfn hypot(double3 x, double3 y);\n" |
| 34266 | "double4 __ovld __cnfn hypot(double4 x, double4 y);\n" |
| 34267 | "double8 __ovld __cnfn hypot(double8 x, double8 y);\n" |
| 34268 | "double16 __ovld __cnfn hypot(double16 x, double16 y);\n" |
| 34269 | "#endif //cl_khr_fp64\n" |
| 34270 | "#ifdef cl_khr_fp16\n" |
| 34271 | "half __ovld __cnfn hypot(half x, half y);\n" |
| 34272 | "half2 __ovld __cnfn hypot(half2 x, half2 y);\n" |
| 34273 | "half3 __ovld __cnfn hypot(half3 x, half3 y);\n" |
| 34274 | "half4 __ovld __cnfn hypot(half4 x, half4 y);\n" |
| 34275 | "half8 __ovld __cnfn hypot(half8 x, half8 y);\n" |
| 34276 | "half16 __ovld __cnfn hypot(half16 x, half16 y);\n" |
| 34277 | "#endif //cl_khr_fp16\n" |
| 34278 | "\n" |
| 34279 | "/**\n" |
| 34280 | " * Return the exponent as an integer value.\n" |
| 34281 | " */\n" |
| 34282 | "int __ovld __cnfn ilogb(float x);\n" |
| 34283 | "int2 __ovld __cnfn ilogb(float2 x);\n" |
| 34284 | "int3 __ovld __cnfn ilogb(float3 x);\n" |
| 34285 | "int4 __ovld __cnfn ilogb(float4 x);\n" |
| 34286 | "int8 __ovld __cnfn ilogb(float8 x);\n" |
| 34287 | "int16 __ovld __cnfn ilogb(float16 x);\n" |
| 34288 | "#ifdef cl_khr_fp64\n" |
| 34289 | "int __ovld __cnfn ilogb(double x);\n" |
| 34290 | "int2 __ovld __cnfn ilogb(double2 x);\n" |
| 34291 | "int3 __ovld __cnfn ilogb(double3 x);\n" |
| 34292 | "int4 __ovld __cnfn ilogb(double4 x);\n" |
| 34293 | "int8 __ovld __cnfn ilogb(double8 x);\n" |
| 34294 | "int16 __ovld __cnfn ilogb(double16 x);\n" |
| 34295 | "#endif //cl_khr_fp64\n" |
| 34296 | "#ifdef cl_khr_fp16\n" |
| 34297 | "int __ovld __cnfn ilogb(half x);\n" |
| 34298 | "int2 __ovld __cnfn ilogb(half2 x);\n" |
| 34299 | "int3 __ovld __cnfn ilogb(half3 x);\n" |
| 34300 | "int4 __ovld __cnfn ilogb(half4 x);\n" |
| 34301 | "int8 __ovld __cnfn ilogb(half8 x);\n" |
| 34302 | "int16 __ovld __cnfn ilogb(half16 x);\n" |
| 34303 | "#endif //cl_khr_fp16\n" |
| 34304 | "\n" |
| 34305 | "/**\n" |
| 34306 | " * Multiply x by 2 to the power n.\n" |
| 34307 | " */\n" |
| 34308 | "float __ovld __cnfn ldexp(float x, int n);\n" |
| 34309 | "float2 __ovld __cnfn ldexp(float2 x, int2 n);\n" |
| 34310 | "float3 __ovld __cnfn ldexp(float3 x, int3 n);\n" |
| 34311 | "float4 __ovld __cnfn ldexp(float4 x, int4 n);\n" |
| 34312 | "float8 __ovld __cnfn ldexp(float8 x, int8 n);\n" |
| 34313 | "float16 __ovld __cnfn ldexp(float16 x, int16 n);\n" |
| 34314 | "float2 __ovld __cnfn ldexp(float2 x, int n);\n" |
| 34315 | "float3 __ovld __cnfn ldexp(float3 x, int n);\n" |
| 34316 | "float4 __ovld __cnfn ldexp(float4 x, int n);\n" |
| 34317 | "float8 __ovld __cnfn ldexp(float8 x, int n);\n" |
| 34318 | "float16 __ovld __cnfn ldexp(float16 x, int n);\n" |
| 34319 | "#ifdef cl_khr_fp64\n" |
| 34320 | "double __ovld __cnfn ldexp(double x, int n);\n" |
| 34321 | "double2 __ovld __cnfn ldexp(double2 x, int2 n);\n" |
| 34322 | "double3 __ovld __cnfn ldexp(double3 x, int3 n);\n" |
| 34323 | "double4 __ovld __cnfn ldexp(double4 x, int4 n);\n" |
| 34324 | "double8 __ovld __cnfn ldexp(double8 x, int8 n);\n" |
| 34325 | "double16 __ovld __cnfn ldexp(double16 x, int16 n);\n" |
| 34326 | "double2 __ovld __cnfn ldexp(double2 x, int n);\n" |
| 34327 | "double3 __ovld __cnfn ldexp(double3 x, int n);\n" |
| 34328 | "double4 __ovld __cnfn ldexp(double4 x, int n);\n" |
| 34329 | "double8 __ovld __cnfn ldexp(double8 x, int n);\n" |
| 34330 | "double16 __ovld __cnfn ldexp(double16 x, int n);\n" |
| 34331 | "#endif //cl_khr_fp64\n" |
| 34332 | "#ifdef cl_khr_fp16\n" |
| 34333 | "half __ovld __cnfn ldexp(half x, int n);\n" |
| 34334 | "half2 __ovld __cnfn ldexp(half2 x, int2 n);\n" |
| 34335 | "half3 __ovld __cnfn ldexp(half3 x, int3 n);\n" |
| 34336 | "half4 __ovld __cnfn ldexp(half4 x, int4 n);\n" |
| 34337 | "half8 __ovld __cnfn ldexp(half8 x, int8 n);\n" |
| 34338 | "half16 __ovld __cnfn ldexp(half16 x, int16 n);\n" |
| 34339 | "half2 __ovld __cnfn ldexp(half2 x, int n);\n" |
| 34340 | "half3 __ovld __cnfn ldexp(half3 x, int n);\n" |
| 34341 | "half4 __ovld __cnfn ldexp(half4 x, int n);\n" |
| 34342 | "half8 __ovld __cnfn ldexp(half8 x, int n);\n" |
| 34343 | "half16 __ovld __cnfn ldexp(half16 x, int n);\n" |
| 34344 | "#endif //cl_khr_fp16\n" |
| 34345 | "\n" |
| 34346 | "/**\n" |
| 34347 | " * Log gamma function. Returns the natural\n" |
| 34348 | " * logarithm of the absolute value of the gamma\n" |
| 34349 | " * function. The sign of the gamma function is\n" |
| 34350 | " * returned in the signp argument of lgamma_r.\n" |
| 34351 | " */\n" |
| 34352 | "float __ovld __cnfn lgamma(float x);\n" |
| 34353 | "float2 __ovld __cnfn lgamma(float2 x);\n" |
| 34354 | "float3 __ovld __cnfn lgamma(float3 x);\n" |
| 34355 | "float4 __ovld __cnfn lgamma(float4 x);\n" |
| 34356 | "float8 __ovld __cnfn lgamma(float8 x);\n" |
| 34357 | "float16 __ovld __cnfn lgamma(float16 x);\n" |
| 34358 | "#ifdef cl_khr_fp64\n" |
| 34359 | "double __ovld __cnfn lgamma(double x);\n" |
| 34360 | "double2 __ovld __cnfn lgamma(double2 x);\n" |
| 34361 | "double3 __ovld __cnfn lgamma(double3 x);\n" |
| 34362 | "double4 __ovld __cnfn lgamma(double4 x);\n" |
| 34363 | "double8 __ovld __cnfn lgamma(double8 x);\n" |
| 34364 | "double16 __ovld __cnfn lgamma(double16 x);\n" |
| 34365 | "#endif //cl_khr_fp64\n" |
| 34366 | "#ifdef cl_khr_fp16\n" |
| 34367 | "half __ovld __cnfn lgamma(half x);\n" |
| 34368 | "half2 __ovld __cnfn lgamma(half2 x);\n" |
| 34369 | "half3 __ovld __cnfn lgamma(half3 x);\n" |
| 34370 | "half4 __ovld __cnfn lgamma(half4 x);\n" |
| 34371 | "half8 __ovld __cnfn lgamma(half8 x);\n" |
| 34372 | "half16 __ovld __cnfn lgamma(half16 x);\n" |
| 34373 | "#endif //cl_khr_fp16\n" |
| 34374 | "\n" |
| 34375 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34376 | "float __ovld lgamma_r(float x, int *signp);\n" |
| 34377 | "float2 __ovld lgamma_r(float2 x, int2 *signp);\n" |
| 34378 | "float3 __ovld lgamma_r(float3 x, int3 *signp);\n" |
| 34379 | "float4 __ovld lgamma_r(float4 x, int4 *signp);\n" |
| 34380 | "float8 __ovld lgamma_r(float8 x, int8 *signp);\n" |
| 34381 | "float16 __ovld lgamma_r(float16 x, int16 *signp);\n" |
| 34382 | "#ifdef cl_khr_fp64\n" |
| 34383 | "double __ovld lgamma_r(double x, int *signp);\n" |
| 34384 | "double2 __ovld lgamma_r(double2 x, int2 *signp);\n" |
| 34385 | "double3 __ovld lgamma_r(double3 x, int3 *signp);\n" |
| 34386 | "double4 __ovld lgamma_r(double4 x, int4 *signp);\n" |
| 34387 | "double8 __ovld lgamma_r(double8 x, int8 *signp);\n" |
| 34388 | "double16 __ovld lgamma_r(double16 x, int16 *signp);\n" |
| 34389 | "#endif //cl_khr_fp64\n" |
| 34390 | "#ifdef cl_khr_fp16\n" |
| 34391 | "half __ovld lgamma_r(half x, int *signp);\n" |
| 34392 | "half2 __ovld lgamma_r(half2 x, int2 *signp);\n" |
| 34393 | "half3 __ovld lgamma_r(half3 x, int3 *signp);\n" |
| 34394 | "half4 __ovld lgamma_r(half4 x, int4 *signp);\n" |
| 34395 | "half8 __ovld lgamma_r(half8 x, int8 *signp);\n" |
| 34396 | "half16 __ovld lgamma_r(half16 x, int16 *signp);\n" |
| 34397 | "#endif //cl_khr_fp16\n" |
| 34398 | "#else\n" |
| 34399 | "float __ovld lgamma_r(float x, __global int *signp);\n" |
| 34400 | "float2 __ovld lgamma_r(float2 x, __global int2 *signp);\n" |
| 34401 | "float3 __ovld lgamma_r(float3 x, __global int3 *signp);\n" |
| 34402 | "float4 __ovld lgamma_r(float4 x, __global int4 *signp);\n" |
| 34403 | "float8 __ovld lgamma_r(float8 x, __global int8 *signp);\n" |
| 34404 | "float16 __ovld lgamma_r(float16 x, __global int16 *signp);\n" |
| 34405 | "float __ovld lgamma_r(float x, __local int *signp);\n" |
| 34406 | "float2 __ovld lgamma_r(float2 x, __local int2 *signp);\n" |
| 34407 | "float3 __ovld lgamma_r(float3 x, __local int3 *signp);\n" |
| 34408 | "float4 __ovld lgamma_r(float4 x, __local int4 *signp);\n" |
| 34409 | "float8 __ovld lgamma_r(float8 x, __local int8 *signp);\n" |
| 34410 | "float16 __ovld lgamma_r(float16 x, __local int16 *signp);\n" |
| 34411 | "float __ovld lgamma_r(float x, __private int *signp);\n" |
| 34412 | "float2 __ovld lgamma_r(float2 x, __private int2 *signp);\n" |
| 34413 | "float3 __ovld lgamma_r(float3 x, __private int3 *signp);\n" |
| 34414 | "float4 __ovld lgamma_r(float4 x, __private int4 *signp);\n" |
| 34415 | "float8 __ovld lgamma_r(float8 x, __private int8 *signp);\n" |
| 34416 | "float16 __ovld lgamma_r(float16 x, __private int16 *signp);\n" |
| 34417 | "#ifdef cl_khr_fp64\n" |
| 34418 | "double __ovld lgamma_r(double x, __global int *signp);\n" |
| 34419 | "double2 __ovld lgamma_r(double2 x, __global int2 *signp);\n" |
| 34420 | "double3 __ovld lgamma_r(double3 x, __global int3 *signp);\n" |
| 34421 | "double4 __ovld lgamma_r(double4 x, __global int4 *signp);\n" |
| 34422 | "double8 __ovld lgamma_r(double8 x, __global int8 *signp);\n" |
| 34423 | "double16 __ovld lgamma_r(double16 x, __global int16 *signp);\n" |
| 34424 | "double __ovld lgamma_r(double x, __local int *signp);\n" |
| 34425 | "double2 __ovld lgamma_r(double2 x, __local int2 *signp);\n" |
| 34426 | "double3 __ovld lgamma_r(double3 x, __local int3 *signp);\n" |
| 34427 | "double4 __ovld lgamma_r(double4 x, __local int4 *signp);\n" |
| 34428 | "double8 __ovld lgamma_r(double8 x, __local int8 *signp);\n" |
| 34429 | "double16 __ovld lgamma_r(double16 x, __local int16 *signp);\n" |
| 34430 | "double __ovld lgamma_r(double x, __private int *signp);\n" |
| 34431 | "double2 __ovld lgamma_r(double2 x, __private int2 *signp);\n" |
| 34432 | "double3 __ovld lgamma_r(double3 x, __private int3 *signp);\n" |
| 34433 | "double4 __ovld lgamma_r(double4 x, __private int4 *signp);\n" |
| 34434 | "double8 __ovld lgamma_r(double8 x, __private int8 *signp);\n" |
| 34435 | "double16 __ovld lgamma_r(double16 x, __private int16 *signp);\n" |
| 34436 | "#endif //cl_khr_fp64\n" |
| 34437 | "#ifdef cl_khr_fp16\n" |
| 34438 | "half __ovld lgamma_r(half x, __global int *signp);\n" |
| 34439 | "half2 __ovld lgamma_r(half2 x, __global int2 *signp);\n" |
| 34440 | "half3 __ovld lgamma_r(half3 x, __global int3 *signp);\n" |
| 34441 | "half4 __ovld lgamma_r(half4 x, __global int4 *signp);\n" |
| 34442 | "half8 __ovld lgamma_r(half8 x, __global int8 *signp);\n" |
| 34443 | "half16 __ovld lgamma_r(half16 x, __global int16 *signp);\n" |
| 34444 | "half __ovld lgamma_r(half x, __local int *signp);\n" |
| 34445 | "half2 __ovld lgamma_r(half2 x, __local int2 *signp);\n" |
| 34446 | "half3 __ovld lgamma_r(half3 x, __local int3 *signp);\n" |
| 34447 | "half4 __ovld lgamma_r(half4 x, __local int4 *signp);\n" |
| 34448 | "half8 __ovld lgamma_r(half8 x, __local int8 *signp);\n" |
| 34449 | "half16 __ovld lgamma_r(half16 x, __local int16 *signp);\n" |
| 34450 | "half __ovld lgamma_r(half x, __private int *signp);\n" |
| 34451 | "half2 __ovld lgamma_r(half2 x, __private int2 *signp);\n" |
| 34452 | "half3 __ovld lgamma_r(half3 x, __private int3 *signp);\n" |
| 34453 | "half4 __ovld lgamma_r(half4 x, __private int4 *signp);\n" |
| 34454 | "half8 __ovld lgamma_r(half8 x, __private int8 *signp);\n" |
| 34455 | "half16 __ovld lgamma_r(half16 x, __private int16 *signp);\n" |
| 34456 | "#endif //cl_khr_fp16\n" |
| 34457 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34458 | "\n" |
| 34459 | "/**\n" |
| 34460 | " * Compute natural logarithm.\n" |
| 34461 | " */\n" |
| 34462 | "float __ovld __cnfn log(float);\n" |
| 34463 | "float2 __ovld __cnfn log(float2);\n" |
| 34464 | "float3 __ovld __cnfn log(float3);\n" |
| 34465 | "float4 __ovld __cnfn log(float4);\n" |
| 34466 | "float8 __ovld __cnfn log(float8);\n" |
| 34467 | "float16 __ovld __cnfn log(float16);\n" |
| 34468 | "#ifdef cl_khr_fp64\n" |
| 34469 | "double __ovld __cnfn log(double);\n" |
| 34470 | "double2 __ovld __cnfn log(double2);\n" |
| 34471 | "double3 __ovld __cnfn log(double3);\n" |
| 34472 | "double4 __ovld __cnfn log(double4);\n" |
| 34473 | "double8 __ovld __cnfn log(double8);\n" |
| 34474 | "double16 __ovld __cnfn log(double16);\n" |
| 34475 | "#endif //cl_khr_fp64\n" |
| 34476 | "#ifdef cl_khr_fp16\n" |
| 34477 | "half __ovld __cnfn log(half);\n" |
| 34478 | "half2 __ovld __cnfn log(half2);\n" |
| 34479 | "half3 __ovld __cnfn log(half3);\n" |
| 34480 | "half4 __ovld __cnfn log(half4);\n" |
| 34481 | "half8 __ovld __cnfn log(half8);\n" |
| 34482 | "half16 __ovld __cnfn log(half16);\n" |
| 34483 | "#endif //cl_khr_fp16\n" |
| 34484 | "\n" |
| 34485 | "/**\n" |
| 34486 | " * Compute a base 2 logarithm.\n" |
| 34487 | " */\n" |
| 34488 | "float __ovld __cnfn log2(float);\n" |
| 34489 | "float2 __ovld __cnfn log2(float2);\n" |
| 34490 | "float3 __ovld __cnfn log2(float3);\n" |
| 34491 | "float4 __ovld __cnfn log2(float4);\n" |
| 34492 | "float8 __ovld __cnfn log2(float8);\n" |
| 34493 | "float16 __ovld __cnfn log2(float16);\n" |
| 34494 | "#ifdef cl_khr_fp64\n" |
| 34495 | "double __ovld __cnfn log2(double);\n" |
| 34496 | "double2 __ovld __cnfn log2(double2);\n" |
| 34497 | "double3 __ovld __cnfn log2(double3);\n" |
| 34498 | "double4 __ovld __cnfn log2(double4);\n" |
| 34499 | "double8 __ovld __cnfn log2(double8);\n" |
| 34500 | "double16 __ovld __cnfn log2(double16);\n" |
| 34501 | "#endif //cl_khr_fp64\n" |
| 34502 | "#ifdef cl_khr_fp16\n" |
| 34503 | "half __ovld __cnfn log2(half);\n" |
| 34504 | "half2 __ovld __cnfn log2(half2);\n" |
| 34505 | "half3 __ovld __cnfn log2(half3);\n" |
| 34506 | "half4 __ovld __cnfn log2(half4);\n" |
| 34507 | "half8 __ovld __cnfn log2(half8);\n" |
| 34508 | "half16 __ovld __cnfn log2(half16);\n" |
| 34509 | "#endif //cl_khr_fp16\n" |
| 34510 | "\n" |
| 34511 | "/**\n" |
| 34512 | " * Compute a base 10 logarithm.\n" |
| 34513 | " */\n" |
| 34514 | "float __ovld __cnfn log10(float);\n" |
| 34515 | "float2 __ovld __cnfn log10(float2);\n" |
| 34516 | "float3 __ovld __cnfn log10(float3);\n" |
| 34517 | "float4 __ovld __cnfn log10(float4);\n" |
| 34518 | "float8 __ovld __cnfn log10(float8);\n" |
| 34519 | "float16 __ovld __cnfn log10(float16);\n" |
| 34520 | "#ifdef cl_khr_fp64\n" |
| 34521 | "double __ovld __cnfn log10(double);\n" |
| 34522 | "double2 __ovld __cnfn log10(double2);\n" |
| 34523 | "double3 __ovld __cnfn log10(double3);\n" |
| 34524 | "double4 __ovld __cnfn log10(double4);\n" |
| 34525 | "double8 __ovld __cnfn log10(double8);\n" |
| 34526 | "double16 __ovld __cnfn log10(double16);\n" |
| 34527 | "#endif //cl_khr_fp64\n" |
| 34528 | "#ifdef cl_khr_fp16\n" |
| 34529 | "half __ovld __cnfn log10(half);\n" |
| 34530 | "half2 __ovld __cnfn log10(half2);\n" |
| 34531 | "half3 __ovld __cnfn log10(half3);\n" |
| 34532 | "half4 __ovld __cnfn log10(half4);\n" |
| 34533 | "half8 __ovld __cnfn log10(half8);\n" |
| 34534 | "half16 __ovld __cnfn log10(half16);\n" |
| 34535 | "#endif //cl_khr_fp16\n" |
| 34536 | "\n" |
| 34537 | "/**\n" |
| 34538 | " * Compute a base e logarithm of (1.0 + x).\n" |
| 34539 | " */\n" |
| 34540 | "float __ovld __cnfn log1p(float x);\n" |
| 34541 | "float2 __ovld __cnfn log1p(float2 x);\n" |
| 34542 | "float3 __ovld __cnfn log1p(float3 x);\n" |
| 34543 | "float4 __ovld __cnfn log1p(float4 x);\n" |
| 34544 | "float8 __ovld __cnfn log1p(float8 x);\n" |
| 34545 | "float16 __ovld __cnfn log1p(float16 x);\n" |
| 34546 | "#ifdef cl_khr_fp64\n" |
| 34547 | "double __ovld __cnfn log1p(double x);\n" |
| 34548 | "double2 __ovld __cnfn log1p(double2 x);\n" |
| 34549 | "double3 __ovld __cnfn log1p(double3 x);\n" |
| 34550 | "double4 __ovld __cnfn log1p(double4 x);\n" |
| 34551 | "double8 __ovld __cnfn log1p(double8 x);\n" |
| 34552 | "double16 __ovld __cnfn log1p(double16 x);\n" |
| 34553 | "#endif //cl_khr_fp64\n" |
| 34554 | "#ifdef cl_khr_fp16\n" |
| 34555 | "half __ovld __cnfn log1p(half x);\n" |
| 34556 | "half2 __ovld __cnfn log1p(half2 x);\n" |
| 34557 | "half3 __ovld __cnfn log1p(half3 x);\n" |
| 34558 | "half4 __ovld __cnfn log1p(half4 x);\n" |
| 34559 | "half8 __ovld __cnfn log1p(half8 x);\n" |
| 34560 | "half16 __ovld __cnfn log1p(half16 x);\n" |
| 34561 | "#endif //cl_khr_fp16\n" |
| 34562 | "\n" |
| 34563 | "/**\n" |
| 34564 | " * Compute the exponent of x, which is the integral\n" |
| 34565 | " * part of logr | x |.\n" |
| 34566 | " */\n" |
| 34567 | "float __ovld __cnfn logb(float x);\n" |
| 34568 | "float2 __ovld __cnfn logb(float2 x);\n" |
| 34569 | "float3 __ovld __cnfn logb(float3 x);\n" |
| 34570 | "float4 __ovld __cnfn logb(float4 x);\n" |
| 34571 | "float8 __ovld __cnfn logb(float8 x);\n" |
| 34572 | "float16 __ovld __cnfn logb(float16 x);\n" |
| 34573 | "#ifdef cl_khr_fp64\n" |
| 34574 | "double __ovld __cnfn logb(double x);\n" |
| 34575 | "double2 __ovld __cnfn logb(double2 x);\n" |
| 34576 | "double3 __ovld __cnfn logb(double3 x);\n" |
| 34577 | "double4 __ovld __cnfn logb(double4 x);\n" |
| 34578 | "double8 __ovld __cnfn logb(double8 x);\n" |
| 34579 | "double16 __ovld __cnfn logb(double16 x);\n" |
| 34580 | "#endif //cl_khr_fp64\n" |
| 34581 | "#ifdef cl_khr_fp16\n" |
| 34582 | "half __ovld __cnfn logb(half x);\n" |
| 34583 | "half2 __ovld __cnfn logb(half2 x);\n" |
| 34584 | "half3 __ovld __cnfn logb(half3 x);\n" |
| 34585 | "half4 __ovld __cnfn logb(half4 x);\n" |
| 34586 | "half8 __ovld __cnfn logb(half8 x);\n" |
| 34587 | "half16 __ovld __cnfn logb(half16 x);\n" |
| 34588 | "#endif //cl_khr_fp16\n" |
| 34589 | "\n" |
| 34590 | "/**\n" |
| 34591 | " * mad approximates a * b + c. Whether or how the\n" |
| 34592 | " * product of a * b is rounded and how supernormal or\n" |
| 34593 | " * subnormal intermediate products are handled is not\n" |
| 34594 | " * defined. mad is intended to be used where speed is\n" |
| 34595 | " * preferred over accuracy.\n" |
| 34596 | " */\n" |
| 34597 | "float __ovld __cnfn mad(float a, float b, float c);\n" |
| 34598 | "float2 __ovld __cnfn mad(float2 a, float2 b, float2 c);\n" |
| 34599 | "float3 __ovld __cnfn mad(float3 a, float3 b, float3 c);\n" |
| 34600 | "float4 __ovld __cnfn mad(float4 a, float4 b, float4 c);\n" |
| 34601 | "float8 __ovld __cnfn mad(float8 a, float8 b, float8 c);\n" |
| 34602 | "float16 __ovld __cnfn mad(float16 a, float16 b, float16 c);\n" |
| 34603 | "#ifdef cl_khr_fp64\n" |
| 34604 | "double __ovld __cnfn mad(double a, double b, double c);\n" |
| 34605 | "double2 __ovld __cnfn mad(double2 a, double2 b, double2 c);\n" |
| 34606 | "double3 __ovld __cnfn mad(double3 a, double3 b, double3 c);\n" |
| 34607 | "double4 __ovld __cnfn mad(double4 a, double4 b, double4 c);\n" |
| 34608 | "double8 __ovld __cnfn mad(double8 a, double8 b, double8 c);\n" |
| 34609 | "double16 __ovld __cnfn mad(double16 a, double16 b, double16 c);\n" |
| 34610 | "#endif //cl_khr_fp64\n" |
| 34611 | "#ifdef cl_khr_fp16\n" |
| 34612 | "half __ovld __cnfn mad(half a, half b, half c);\n" |
| 34613 | "half2 __ovld __cnfn mad(half2 a, half2 b, half2 c);\n" |
| 34614 | "half3 __ovld __cnfn mad(half3 a, half3 b, half3 c);\n" |
| 34615 | "half4 __ovld __cnfn mad(half4 a, half4 b, half4 c);\n" |
| 34616 | "half8 __ovld __cnfn mad(half8 a, half8 b, half8 c);\n" |
| 34617 | "half16 __ovld __cnfn mad(half16 a, half16 b, half16 c);\n" |
| 34618 | "#endif //cl_khr_fp16\n" |
| 34619 | "\n" |
| 34620 | "/**\n" |
| 34621 | " * Returns x if | x | > | y |, y if | y | > | x |, otherwise\n" |
| 34622 | " * fmax(x, y).\n" |
| 34623 | " */\n" |
| 34624 | "float __ovld __cnfn maxmag(float x, float y);\n" |
| 34625 | "float2 __ovld __cnfn maxmag(float2 x, float2 y);\n" |
| 34626 | "float3 __ovld __cnfn maxmag(float3 x, float3 y);\n" |
| 34627 | "float4 __ovld __cnfn maxmag(float4 x, float4 y);\n" |
| 34628 | "float8 __ovld __cnfn maxmag(float8 x, float8 y);\n" |
| 34629 | "float16 __ovld __cnfn maxmag(float16 x, float16 y);\n" |
| 34630 | "#ifdef cl_khr_fp64\n" |
| 34631 | "double __ovld __cnfn maxmag(double x, double y);\n" |
| 34632 | "double2 __ovld __cnfn maxmag(double2 x, double2 y);\n" |
| 34633 | "double3 __ovld __cnfn maxmag(double3 x, double3 y);\n" |
| 34634 | "double4 __ovld __cnfn maxmag(double4 x, double4 y);\n" |
| 34635 | "double8 __ovld __cnfn maxmag(double8 x, double8 y);\n" |
| 34636 | "double16 __ovld __cnfn maxmag(double16 x, double16 y);\n" |
| 34637 | "#endif //cl_khr_fp64\n" |
| 34638 | "#ifdef cl_khr_fp16\n" |
| 34639 | "half __ovld __cnfn maxmag(half x, half y);\n" |
| 34640 | "half2 __ovld __cnfn maxmag(half2 x, half2 y);\n" |
| 34641 | "half3 __ovld __cnfn maxmag(half3 x, half3 y);\n" |
| 34642 | "half4 __ovld __cnfn maxmag(half4 x, half4 y);\n" |
| 34643 | "half8 __ovld __cnfn maxmag(half8 x, half8 y);\n" |
| 34644 | "half16 __ovld __cnfn maxmag(half16 x, half16 y);\n" |
| 34645 | "#endif //cl_khr_fp16\n" |
| 34646 | "\n" |
| 34647 | "/**\n" |
| 34648 | " * Returns x if | x | < | y |, y if | y | < | x |, otherwise\n" |
| 34649 | " * fmin(x, y).\n" |
| 34650 | " */\n" |
| 34651 | "float __ovld __cnfn minmag(float x, float y);\n" |
| 34652 | "float2 __ovld __cnfn minmag(float2 x, float2 y);\n" |
| 34653 | "float3 __ovld __cnfn minmag(float3 x, float3 y);\n" |
| 34654 | "float4 __ovld __cnfn minmag(float4 x, float4 y);\n" |
| 34655 | "float8 __ovld __cnfn minmag(float8 x, float8 y);\n" |
| 34656 | "float16 __ovld __cnfn minmag(float16 x, float16 y);\n" |
| 34657 | "#ifdef cl_khr_fp64\n" |
| 34658 | "double __ovld __cnfn minmag(double x, double y);\n" |
| 34659 | "double2 __ovld __cnfn minmag(double2 x, double2 y);\n" |
| 34660 | "double3 __ovld __cnfn minmag(double3 x, double3 y);\n" |
| 34661 | "double4 __ovld __cnfn minmag(double4 x, double4 y);\n" |
| 34662 | "double8 __ovld __cnfn minmag(double8 x, double8 y);\n" |
| 34663 | "double16 __ovld __cnfn minmag(double16 x, double16 y);\n" |
| 34664 | "#endif //cl_khr_fp64\n" |
| 34665 | "#ifdef cl_khr_fp16\n" |
| 34666 | "half __ovld __cnfn minmag(half x, half y);\n" |
| 34667 | "half2 __ovld __cnfn minmag(half2 x, half2 y);\n" |
| 34668 | "half3 __ovld __cnfn minmag(half3 x, half3 y);\n" |
| 34669 | "half4 __ovld __cnfn minmag(half4 x, half4 y);\n" |
| 34670 | "half8 __ovld __cnfn minmag(half8 x, half8 y);\n" |
| 34671 | "half16 __ovld __cnfn minmag(half16 x, half16 y);\n" |
| 34672 | "#endif //cl_khr_fp16\n" |
| 34673 | "\n" |
| 34674 | "/**\n" |
| 34675 | " * Decompose a floating-point number. The modf\n" |
| 34676 | " * function breaks the argument x into integral and\n" |
| 34677 | " * fractional parts, each of which has the same sign as\n" |
| 34678 | " * the argument. It stores the integral part in the object\n" |
| 34679 | " * pointed to by iptr.\n" |
| 34680 | " */\n" |
| 34681 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34682 | "float __ovld modf(float x, float *iptr);\n" |
| 34683 | "float2 __ovld modf(float2 x, float2 *iptr);\n" |
| 34684 | "float3 __ovld modf(float3 x, float3 *iptr);\n" |
| 34685 | "float4 __ovld modf(float4 x, float4 *iptr);\n" |
| 34686 | "float8 __ovld modf(float8 x, float8 *iptr);\n" |
| 34687 | "float16 __ovld modf(float16 x, float16 *iptr);\n" |
| 34688 | "#ifdef cl_khr_fp64\n" |
| 34689 | "double __ovld modf(double x, double *iptr);\n" |
| 34690 | "double2 __ovld modf(double2 x, double2 *iptr);\n" |
| 34691 | "double3 __ovld modf(double3 x, double3 *iptr);\n" |
| 34692 | "double4 __ovld modf(double4 x, double4 *iptr);\n" |
| 34693 | "double8 __ovld modf(double8 x, double8 *iptr);\n" |
| 34694 | "double16 __ovld modf(double16 x, double16 *iptr);\n" |
| 34695 | "#endif //cl_khr_fp64\n" |
| 34696 | "#ifdef cl_khr_fp16\n" |
| 34697 | "half __ovld modf(half x, half *iptr);\n" |
| 34698 | "half2 __ovld modf(half2 x, half2 *iptr);\n" |
| 34699 | "half3 __ovld modf(half3 x, half3 *iptr);\n" |
| 34700 | "half4 __ovld modf(half4 x, half4 *iptr);\n" |
| 34701 | "half8 __ovld modf(half8 x, half8 *iptr);\n" |
| 34702 | "half16 __ovld modf(half16 x, half16 *iptr);\n" |
| 34703 | "#endif //cl_khr_fp16\n" |
| 34704 | "#else\n" |
| 34705 | "float __ovld modf(float x, __global float *iptr);\n" |
| 34706 | "float2 __ovld modf(float2 x, __global float2 *iptr);\n" |
| 34707 | "float3 __ovld modf(float3 x, __global float3 *iptr);\n" |
| 34708 | "float4 __ovld modf(float4 x, __global float4 *iptr);\n" |
| 34709 | "float8 __ovld modf(float8 x, __global float8 *iptr);\n" |
| 34710 | "float16 __ovld modf(float16 x, __global float16 *iptr);\n" |
| 34711 | "float __ovld modf(float x, __local float *iptr);\n" |
| 34712 | "float2 __ovld modf(float2 x, __local float2 *iptr);\n" |
| 34713 | "float3 __ovld modf(float3 x, __local float3 *iptr);\n" |
| 34714 | "float4 __ovld modf(float4 x, __local float4 *iptr);\n" |
| 34715 | "float8 __ovld modf(float8 x, __local float8 *iptr);\n" |
| 34716 | "float16 __ovld modf(float16 x, __local float16 *iptr);\n" |
| 34717 | "float __ovld modf(float x, __private float *iptr);\n" |
| 34718 | "float2 __ovld modf(float2 x, __private float2 *iptr);\n" |
| 34719 | "float3 __ovld modf(float3 x, __private float3 *iptr);\n" |
| 34720 | "float4 __ovld modf(float4 x, __private float4 *iptr);\n" |
| 34721 | "float8 __ovld modf(float8 x, __private float8 *iptr);\n" |
| 34722 | "float16 __ovld modf(float16 x, __private float16 *iptr);\n" |
| 34723 | "#ifdef cl_khr_fp64\n" |
| 34724 | "double __ovld modf(double x, __global double *iptr);\n" |
| 34725 | "double2 __ovld modf(double2 x, __global double2 *iptr);\n" |
| 34726 | "double3 __ovld modf(double3 x, __global double3 *iptr);\n" |
| 34727 | "double4 __ovld modf(double4 x, __global double4 *iptr);\n" |
| 34728 | "double8 __ovld modf(double8 x, __global double8 *iptr);\n" |
| 34729 | "double16 __ovld modf(double16 x, __global double16 *iptr);\n" |
| 34730 | "double __ovld modf(double x, __local double *iptr);\n" |
| 34731 | "double2 __ovld modf(double2 x, __local double2 *iptr);\n" |
| 34732 | "double3 __ovld modf(double3 x, __local double3 *iptr);\n" |
| 34733 | "double4 __ovld modf(double4 x, __local double4 *iptr);\n" |
| 34734 | "double8 __ovld modf(double8 x, __local double8 *iptr);\n" |
| 34735 | "double16 __ovld modf(double16 x, __local double16 *iptr);\n" |
| 34736 | "double __ovld modf(double x, __private double *iptr);\n" |
| 34737 | "double2 __ovld modf(double2 x, __private double2 *iptr);\n" |
| 34738 | "double3 __ovld modf(double3 x, __private double3 *iptr);\n" |
| 34739 | "double4 __ovld modf(double4 x, __private double4 *iptr);\n" |
| 34740 | "double8 __ovld modf(double8 x, __private double8 *iptr);\n" |
| 34741 | "double16 __ovld modf(double16 x, __private double16 *iptr);\n" |
| 34742 | "#endif //cl_khr_fp64\n" |
| 34743 | "#ifdef cl_khr_fp16\n" |
| 34744 | "half __ovld modf(half x, __global half *iptr);\n" |
| 34745 | "half2 __ovld modf(half2 x, __global half2 *iptr);\n" |
| 34746 | "half3 __ovld modf(half3 x, __global half3 *iptr);\n" |
| 34747 | "half4 __ovld modf(half4 x, __global half4 *iptr);\n" |
| 34748 | "half8 __ovld modf(half8 x, __global half8 *iptr);\n" |
| 34749 | "half16 __ovld modf(half16 x, __global half16 *iptr);\n" |
| 34750 | "half __ovld modf(half x, __local half *iptr);\n" |
| 34751 | "half2 __ovld modf(half2 x, __local half2 *iptr);\n" |
| 34752 | "half3 __ovld modf(half3 x, __local half3 *iptr);\n" |
| 34753 | "half4 __ovld modf(half4 x, __local half4 *iptr);\n" |
| 34754 | "half8 __ovld modf(half8 x, __local half8 *iptr);\n" |
| 34755 | "half16 __ovld modf(half16 x, __local half16 *iptr);\n" |
| 34756 | "half __ovld modf(half x, __private half *iptr);\n" |
| 34757 | "half2 __ovld modf(half2 x, __private half2 *iptr);\n" |
| 34758 | "half3 __ovld modf(half3 x, __private half3 *iptr);\n" |
| 34759 | "half4 __ovld modf(half4 x, __private half4 *iptr);\n" |
| 34760 | "half8 __ovld modf(half8 x, __private half8 *iptr);\n" |
| 34761 | "half16 __ovld modf(half16 x, __private half16 *iptr);\n" |
| 34762 | "#endif //cl_khr_fp16\n" |
| 34763 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34764 | "\n" |
| 34765 | "/**\n" |
| 34766 | " * Returns a quiet NaN. The nancode may be placed\n" |
| 34767 | " * in the significand of the resulting NaN.\n" |
| 34768 | " */\n" |
| 34769 | "float __ovld __cnfn nan(uint nancode);\n" |
| 34770 | "float2 __ovld __cnfn nan(uint2 nancode);\n" |
| 34771 | "float3 __ovld __cnfn nan(uint3 nancode);\n" |
| 34772 | "float4 __ovld __cnfn nan(uint4 nancode);\n" |
| 34773 | "float8 __ovld __cnfn nan(uint8 nancode);\n" |
| 34774 | "float16 __ovld __cnfn nan(uint16 nancode);\n" |
| 34775 | "#ifdef cl_khr_fp64\n" |
| 34776 | "double __ovld __cnfn nan(ulong nancode);\n" |
| 34777 | "double2 __ovld __cnfn nan(ulong2 nancode);\n" |
| 34778 | "double3 __ovld __cnfn nan(ulong3 nancode);\n" |
| 34779 | "double4 __ovld __cnfn nan(ulong4 nancode);\n" |
| 34780 | "double8 __ovld __cnfn nan(ulong8 nancode);\n" |
| 34781 | "double16 __ovld __cnfn nan(ulong16 nancode);\n" |
| 34782 | "#endif //cl_khr_fp64\n" |
| 34783 | "#ifdef cl_khr_fp16\n" |
| 34784 | "half __ovld __cnfn nan(ushort nancode);\n" |
| 34785 | "half2 __ovld __cnfn nan(ushort2 nancode);\n" |
| 34786 | "half3 __ovld __cnfn nan(ushort3 nancode);\n" |
| 34787 | "half4 __ovld __cnfn nan(ushort4 nancode);\n" |
| 34788 | "half8 __ovld __cnfn nan(ushort8 nancode);\n" |
| 34789 | "half16 __ovld __cnfn nan(ushort16 nancode);\n" |
| 34790 | "#endif //cl_khr_fp16\n" |
| 34791 | "\n" |
| 34792 | "/**\n" |
| 34793 | " * Computes the next representable single-precision\n" |
| 34794 | " * floating-point value following x in the direction of\n" |
| 34795 | " * y. Thus, if y is less than x, nextafter() returns the\n" |
| 34796 | " * largest representable floating-point number less\n" |
| 34797 | " * than x.\n" |
| 34798 | " */\n" |
| 34799 | "float __ovld __cnfn nextafter(float x, float y);\n" |
| 34800 | "float2 __ovld __cnfn nextafter(float2 x, float2 y);\n" |
| 34801 | "float3 __ovld __cnfn nextafter(float3 x, float3 y);\n" |
| 34802 | "float4 __ovld __cnfn nextafter(float4 x, float4 y);\n" |
| 34803 | "float8 __ovld __cnfn nextafter(float8 x, float8 y);\n" |
| 34804 | "float16 __ovld __cnfn nextafter(float16 x, float16 y);\n" |
| 34805 | "#ifdef cl_khr_fp64\n" |
| 34806 | "double __ovld __cnfn nextafter(double x, double y);\n" |
| 34807 | "double2 __ovld __cnfn nextafter(double2 x, double2 y);\n" |
| 34808 | "double3 __ovld __cnfn nextafter(double3 x, double3 y);\n" |
| 34809 | "double4 __ovld __cnfn nextafter(double4 x, double4 y);\n" |
| 34810 | "double8 __ovld __cnfn nextafter(double8 x, double8 y);\n" |
| 34811 | "double16 __ovld __cnfn nextafter(double16 x, double16 y);\n" |
| 34812 | "#endif //cl_khr_fp64\n" |
| 34813 | "#ifdef cl_khr_fp16\n" |
| 34814 | "half __ovld __cnfn nextafter(half x, half y);\n" |
| 34815 | "half2 __ovld __cnfn nextafter(half2 x, half2 y);\n" |
| 34816 | "half3 __ovld __cnfn nextafter(half3 x, half3 y);\n" |
| 34817 | "half4 __ovld __cnfn nextafter(half4 x, half4 y);\n" |
| 34818 | "half8 __ovld __cnfn nextafter(half8 x, half8 y);\n" |
| 34819 | "half16 __ovld __cnfn nextafter(half16 x, half16 y);\n" |
| 34820 | "#endif //cl_khr_fp16\n" |
| 34821 | "\n" |
| 34822 | "/**\n" |
| 34823 | " * Compute x to the power y.\n" |
| 34824 | " */\n" |
| 34825 | "float __ovld __cnfn pow(float x, float y);\n" |
| 34826 | "float2 __ovld __cnfn pow(float2 x, float2 y);\n" |
| 34827 | "float3 __ovld __cnfn pow(float3 x, float3 y);\n" |
| 34828 | "float4 __ovld __cnfn pow(float4 x, float4 y);\n" |
| 34829 | "float8 __ovld __cnfn pow(float8 x, float8 y);\n" |
| 34830 | "float16 __ovld __cnfn pow(float16 x, float16 y);\n" |
| 34831 | "#ifdef cl_khr_fp64\n" |
| 34832 | "double __ovld __cnfn pow(double x, double y);\n" |
| 34833 | "double2 __ovld __cnfn pow(double2 x, double2 y);\n" |
| 34834 | "double3 __ovld __cnfn pow(double3 x, double3 y);\n" |
| 34835 | "double4 __ovld __cnfn pow(double4 x, double4 y);\n" |
| 34836 | "double8 __ovld __cnfn pow(double8 x, double8 y);\n" |
| 34837 | "double16 __ovld __cnfn pow(double16 x, double16 y);\n" |
| 34838 | "#endif //cl_khr_fp64\n" |
| 34839 | "#ifdef cl_khr_fp16\n" |
| 34840 | "half __ovld __cnfn pow(half x, half y);\n" |
| 34841 | "half2 __ovld __cnfn pow(half2 x, half2 y);\n" |
| 34842 | "half3 __ovld __cnfn pow(half3 x, half3 y);\n" |
| 34843 | "half4 __ovld __cnfn pow(half4 x, half4 y);\n" |
| 34844 | "half8 __ovld __cnfn pow(half8 x, half8 y);\n" |
| 34845 | "half16 __ovld __cnfn pow(half16 x, half16 y);\n" |
| 34846 | "#endif //cl_khr_fp16\n" |
| 34847 | "\n" |
| 34848 | "/**\n" |
| 34849 | " * Compute x to the power y, where y is an integer.\n" |
| 34850 | " */\n" |
| 34851 | "float __ovld __cnfn pown(float x, int y);\n" |
| 34852 | "float2 __ovld __cnfn pown(float2 x, int2 y);\n" |
| 34853 | "float3 __ovld __cnfn pown(float3 x, int3 y);\n" |
| 34854 | "float4 __ovld __cnfn pown(float4 x, int4 y);\n" |
| 34855 | "float8 __ovld __cnfn pown(float8 x, int8 y);\n" |
| 34856 | "float16 __ovld __cnfn pown(float16 x, int16 y);\n" |
| 34857 | "#ifdef cl_khr_fp64\n" |
| 34858 | "double __ovld __cnfn pown(double x, int y);\n" |
| 34859 | "double2 __ovld __cnfn pown(double2 x, int2 y);\n" |
| 34860 | "double3 __ovld __cnfn pown(double3 x, int3 y);\n" |
| 34861 | "double4 __ovld __cnfn pown(double4 x, int4 y);\n" |
| 34862 | "double8 __ovld __cnfn pown(double8 x, int8 y);\n" |
| 34863 | "double16 __ovld __cnfn pown(double16 x, int16 y);\n" |
| 34864 | "#endif //cl_khr_fp64\n" |
| 34865 | "#ifdef cl_khr_fp16\n" |
| 34866 | "half __ovld __cnfn pown(half x, int y);\n" |
| 34867 | "half2 __ovld __cnfn pown(half2 x, int2 y);\n" |
| 34868 | "half3 __ovld __cnfn pown(half3 x, int3 y);\n" |
| 34869 | "half4 __ovld __cnfn pown(half4 x, int4 y);\n" |
| 34870 | "half8 __ovld __cnfn pown(half8 x, int8 y);\n" |
| 34871 | "half16 __ovld __cnfn pown(half16 x, int16 y);\n" |
| 34872 | "#endif //cl_khr_fp16\n" |
| 34873 | "\n" |
| 34874 | "/**\n" |
| 34875 | " * Compute x to the power y, where x is >= 0.\n" |
| 34876 | " */\n" |
| 34877 | "float __ovld __cnfn powr(float x, float y);\n" |
| 34878 | "float2 __ovld __cnfn powr(float2 x, float2 y);\n" |
| 34879 | "float3 __ovld __cnfn powr(float3 x, float3 y);\n" |
| 34880 | "float4 __ovld __cnfn powr(float4 x, float4 y);\n" |
| 34881 | "float8 __ovld __cnfn powr(float8 x, float8 y);\n" |
| 34882 | "float16 __ovld __cnfn powr(float16 x, float16 y);\n" |
| 34883 | "#ifdef cl_khr_fp64\n" |
| 34884 | "double __ovld __cnfn powr(double x, double y);\n" |
| 34885 | "double2 __ovld __cnfn powr(double2 x, double2 y);\n" |
| 34886 | "double3 __ovld __cnfn powr(double3 x, double3 y);\n" |
| 34887 | "double4 __ovld __cnfn powr(double4 x, double4 y);\n" |
| 34888 | "double8 __ovld __cnfn powr(double8 x, double8 y);\n" |
| 34889 | "double16 __ovld __cnfn powr(double16 x, double16 y);\n" |
| 34890 | "#endif //cl_khr_fp64\n" |
| 34891 | "#ifdef cl_khr_fp16\n" |
| 34892 | "half __ovld __cnfn powr(half x, half y);\n" |
| 34893 | "half2 __ovld __cnfn powr(half2 x, half2 y);\n" |
| 34894 | "half3 __ovld __cnfn powr(half3 x, half3 y);\n" |
| 34895 | "half4 __ovld __cnfn powr(half4 x, half4 y);\n" |
| 34896 | "half8 __ovld __cnfn powr(half8 x, half8 y);\n" |
| 34897 | "half16 __ovld __cnfn powr(half16 x, half16 y);\n" |
| 34898 | "#endif //cl_khr_fp16\n" |
| 34899 | "\n" |
| 34900 | "/**\n" |
| 34901 | " * Compute the value r such that r = x - n*y, where n\n" |
| 34902 | " * is the integer nearest the exact value of x/y. If there\n" |
| 34903 | " * are two integers closest to x/y, n shall be the even\n" |
| 34904 | " * one. If r is zero, it is given the same sign as x.\n" |
| 34905 | " */\n" |
| 34906 | "float __ovld __cnfn remainder(float x, float y);\n" |
| 34907 | "float2 __ovld __cnfn remainder(float2 x, float2 y);\n" |
| 34908 | "float3 __ovld __cnfn remainder(float3 x, float3 y);\n" |
| 34909 | "float4 __ovld __cnfn remainder(float4 x, float4 y);\n" |
| 34910 | "float8 __ovld __cnfn remainder(float8 x, float8 y);\n" |
| 34911 | "float16 __ovld __cnfn remainder(float16 x, float16 y);\n" |
| 34912 | "#ifdef cl_khr_fp64\n" |
| 34913 | "double __ovld __cnfn remainder(double x, double y);\n" |
| 34914 | "double2 __ovld __cnfn remainder(double2 x, double2 y);\n" |
| 34915 | "double3 __ovld __cnfn remainder(double3 x, double3 y);\n" |
| 34916 | "double4 __ovld __cnfn remainder(double4 x, double4 y);\n" |
| 34917 | "double8 __ovld __cnfn remainder(double8 x, double8 y);\n" |
| 34918 | "double16 __ovld __cnfn remainder(double16 x, double16 y);\n" |
| 34919 | "#endif //cl_khr_fp64\n" |
| 34920 | "#ifdef cl_khr_fp16\n" |
| 34921 | "half __ovld __cnfn remainder(half x, half y);\n" |
| 34922 | "half2 __ovld __cnfn remainder(half2 x, half2 y);\n" |
| 34923 | "half3 __ovld __cnfn remainder(half3 x, half3 y);\n" |
| 34924 | "half4 __ovld __cnfn remainder(half4 x, half4 y);\n" |
| 34925 | "half8 __ovld __cnfn remainder(half8 x, half8 y);\n" |
| 34926 | "half16 __ovld __cnfn remainder(half16 x, half16 y);\n" |
| 34927 | "#endif //cl_khr_fp16\n" |
| 34928 | "\n" |
| 34929 | "/**\n" |
| 34930 | " * The remquo function computes the value r such\n" |
| 34931 | " * that r = x - n*y, where n is the integer nearest the\n" |
| 34932 | " * exact value of x/y. If there are two integers closest\n" |
| 34933 | " * to x/y, n shall be the even one. If r is zero, it is\n" |
| 34934 | " * given the same sign as x. This is the same value\n" |
| 34935 | " * that is returned by the remainder function.\n" |
| 34936 | " * remquo also calculates the lower seven bits of the\n" |
| 34937 | " * integral quotient x/y, and gives that value the same\n" |
| 34938 | " * sign as x/y. It stores this signed value in the object\n" |
| 34939 | " * pointed to by quo.\n" |
| 34940 | " */\n" |
| 34941 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 34942 | "float __ovld remquo(float x, float y, int *quo);\n" |
| 34943 | "float2 __ovld remquo(float2 x, float2 y, int2 *quo);\n" |
| 34944 | "float3 __ovld remquo(float3 x, float3 y, int3 *quo);\n" |
| 34945 | "float4 __ovld remquo(float4 x, float4 y, int4 *quo);\n" |
| 34946 | "float8 __ovld remquo(float8 x, float8 y, int8 *quo);\n" |
| 34947 | "float16 __ovld remquo(float16 x, float16 y, int16 *quo);\n" |
| 34948 | "#ifdef cl_khr_fp64\n" |
| 34949 | "double __ovld remquo(double x, double y, int *quo);\n" |
| 34950 | "double2 __ovld remquo(double2 x, double2 y, int2 *quo);\n" |
| 34951 | "double3 __ovld remquo(double3 x, double3 y, int3 *quo);\n" |
| 34952 | "double4 __ovld remquo(double4 x, double4 y, int4 *quo);\n" |
| 34953 | "double8 __ovld remquo(double8 x, double8 y, int8 *quo);\n" |
| 34954 | "double16 __ovld remquo(double16 x, double16 y, int16 *quo);\n" |
| 34955 | "#endif //cl_khr_fp64\n" |
| 34956 | "#ifdef cl_khr_fp16\n" |
| 34957 | "half __ovld remquo(half x, half y, int *quo);\n" |
| 34958 | "half2 __ovld remquo(half2 x, half2 y, int2 *quo);\n" |
| 34959 | "half3 __ovld remquo(half3 x, half3 y, int3 *quo);\n" |
| 34960 | "half4 __ovld remquo(half4 x, half4 y, int4 *quo);\n" |
| 34961 | "half8 __ovld remquo(half8 x, half8 y, int8 *quo);\n" |
| 34962 | "half16 __ovld remquo(half16 x, half16 y, int16 *quo);\n" |
| 34963 | "\n" |
| 34964 | "#endif //cl_khr_fp16\n" |
| 34965 | "#else\n" |
| 34966 | "float __ovld remquo(float x, float y, __global int *quo);\n" |
| 34967 | "float2 __ovld remquo(float2 x, float2 y, __global int2 *quo);\n" |
| 34968 | "float3 __ovld remquo(float3 x, float3 y, __global int3 *quo);\n" |
| 34969 | "float4 __ovld remquo(float4 x, float4 y, __global int4 *quo);\n" |
| 34970 | "float8 __ovld remquo(float8 x, float8 y, __global int8 *quo);\n" |
| 34971 | "float16 __ovld remquo(float16 x, float16 y, __global int16 *quo);\n" |
| 34972 | "float __ovld remquo(float x, float y, __local int *quo);\n" |
| 34973 | "float2 __ovld remquo(float2 x, float2 y, __local int2 *quo);\n" |
| 34974 | "float3 __ovld remquo(float3 x, float3 y, __local int3 *quo);\n" |
| 34975 | "float4 __ovld remquo(float4 x, float4 y, __local int4 *quo);\n" |
| 34976 | "float8 __ovld remquo(float8 x, float8 y, __local int8 *quo);\n" |
| 34977 | "float16 __ovld remquo(float16 x, float16 y, __local int16 *quo);\n" |
| 34978 | "float __ovld remquo(float x, float y, __private int *quo);\n" |
| 34979 | "float2 __ovld remquo(float2 x, float2 y, __private int2 *quo);\n" |
| 34980 | "float3 __ovld remquo(float3 x, float3 y, __private int3 *quo);\n" |
| 34981 | "float4 __ovld remquo(float4 x, float4 y, __private int4 *quo);\n" |
| 34982 | "float8 __ovld remquo(float8 x, float8 y, __private int8 *quo);\n" |
| 34983 | "float16 __ovld remquo(float16 x, float16 y, __private int16 *quo);\n" |
| 34984 | "#ifdef cl_khr_fp64\n" |
| 34985 | "double __ovld remquo(double x, double y, __global int *quo);\n" |
| 34986 | "double2 __ovld remquo(double2 x, double2 y, __global int2 *quo);\n" |
| 34987 | "double3 __ovld remquo(double3 x, double3 y, __global int3 *quo);\n" |
| 34988 | "double4 __ovld remquo(double4 x, double4 y, __global int4 *quo);\n" |
| 34989 | "double8 __ovld remquo(double8 x, double8 y, __global int8 *quo);\n" |
| 34990 | "double16 __ovld remquo(double16 x, double16 y, __global int16 *quo);\n" |
| 34991 | "double __ovld remquo(double x, double y, __local int *quo);\n" |
| 34992 | "double2 __ovld remquo(double2 x, double2 y, __local int2 *quo);\n" |
| 34993 | "double3 __ovld remquo(double3 x, double3 y, __local int3 *quo);\n" |
| 34994 | "double4 __ovld remquo(double4 x, double4 y, __local int4 *quo);\n" |
| 34995 | "double8 __ovld remquo(double8 x, double8 y, __local int8 *quo);\n" |
| 34996 | "double16 __ovld remquo(double16 x, double16 y, __local int16 *quo);\n" |
| 34997 | "double __ovld remquo(double x, double y, __private int *quo);\n" |
| 34998 | "double2 __ovld remquo(double2 x, double2 y, __private int2 *quo);\n" |
| 34999 | "double3 __ovld remquo(double3 x, double3 y, __private int3 *quo);\n" |
| 35000 | "double4 __ovld remquo(double4 x, double4 y, __private int4 *quo);\n" |
| 35001 | "double8 __ovld remquo(double8 x, double8 y, __private int8 *quo);\n" |
| 35002 | "double16 __ovld remquo(double16 x, double16 y, __private int16 *quo);\n" |
| 35003 | "#endif //cl_khr_fp64\n" |
| 35004 | "#ifdef cl_khr_fp16\n" |
| 35005 | "half __ovld remquo(half x, half y, __global int *quo);\n" |
| 35006 | "half2 __ovld remquo(half2 x, half2 y, __global int2 *quo);\n" |
| 35007 | "half3 __ovld remquo(half3 x, half3 y, __global int3 *quo);\n" |
| 35008 | "half4 __ovld remquo(half4 x, half4 y, __global int4 *quo);\n" |
| 35009 | "half8 __ovld remquo(half8 x, half8 y, __global int8 *quo);\n" |
| 35010 | "half16 __ovld remquo(half16 x, half16 y, __global int16 *quo);\n" |
| 35011 | "half __ovld remquo(half x, half y, __local int *quo);\n" |
| 35012 | "half2 __ovld remquo(half2 x, half2 y, __local int2 *quo);\n" |
| 35013 | "half3 __ovld remquo(half3 x, half3 y, __local int3 *quo);\n" |
| 35014 | "half4 __ovld remquo(half4 x, half4 y, __local int4 *quo);\n" |
| 35015 | "half8 __ovld remquo(half8 x, half8 y, __local int8 *quo);\n" |
| 35016 | "half16 __ovld remquo(half16 x, half16 y, __local int16 *quo);\n" |
| 35017 | "half __ovld remquo(half x, half y, __private int *quo);\n" |
| 35018 | "half2 __ovld remquo(half2 x, half2 y, __private int2 *quo);\n" |
| 35019 | "half3 __ovld remquo(half3 x, half3 y, __private int3 *quo);\n" |
| 35020 | "half4 __ovld remquo(half4 x, half4 y, __private int4 *quo);\n" |
| 35021 | "half8 __ovld remquo(half8 x, half8 y, __private int8 *quo);\n" |
| 35022 | "half16 __ovld remquo(half16 x, half16 y, __private int16 *quo);\n" |
| 35023 | "#endif //cl_khr_fp16\n" |
| 35024 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 35025 | "/**\n" |
| 35026 | " * Round to integral value (using round to nearest\n" |
| 35027 | " * even rounding mode) in floating-point format.\n" |
| 35028 | " * Refer to section 7.1 for description of rounding\n" |
| 35029 | " * modes.\n" |
| 35030 | " */\n" |
| 35031 | "float __ovld __cnfn rint(float);\n" |
| 35032 | "float2 __ovld __cnfn rint(float2);\n" |
| 35033 | "float3 __ovld __cnfn rint(float3);\n" |
| 35034 | "float4 __ovld __cnfn rint(float4);\n" |
| 35035 | "float8 __ovld __cnfn rint(float8);\n" |
| 35036 | "float16 __ovld __cnfn rint(float16);\n" |
| 35037 | "#ifdef cl_khr_fp64\n" |
| 35038 | "double __ovld __cnfn rint(double);\n" |
| 35039 | "double2 __ovld __cnfn rint(double2);\n" |
| 35040 | "double3 __ovld __cnfn rint(double3);\n" |
| 35041 | "double4 __ovld __cnfn rint(double4);\n" |
| 35042 | "double8 __ovld __cnfn rint(double8);\n" |
| 35043 | "double16 __ovld __cnfn rint(double16);\n" |
| 35044 | "#endif //cl_khr_fp64\n" |
| 35045 | "#ifdef cl_khr_fp16\n" |
| 35046 | "half __ovld __cnfn rint(half);\n" |
| 35047 | "half2 __ovld __cnfn rint(half2);\n" |
| 35048 | "half3 __ovld __cnfn rint(half3);\n" |
| 35049 | "half4 __ovld __cnfn rint(half4);\n" |
| 35050 | "half8 __ovld __cnfn rint(half8);\n" |
| 35051 | "half16 __ovld __cnfn rint(half16);\n" |
| 35052 | "#endif //cl_khr_fp16\n" |
| 35053 | "\n" |
| 35054 | "/**\n" |
| 35055 | " * Compute x to the power 1/y.\n" |
| 35056 | " */\n" |
| 35057 | "float __ovld __cnfn rootn(float x, int y);\n" |
| 35058 | "float2 __ovld __cnfn rootn(float2 x, int2 y);\n" |
| 35059 | "float3 __ovld __cnfn rootn(float3 x, int3 y);\n" |
| 35060 | "float4 __ovld __cnfn rootn(float4 x, int4 y);\n" |
| 35061 | "float8 __ovld __cnfn rootn(float8 x, int8 y);\n" |
| 35062 | "float16 __ovld __cnfn rootn(float16 x, int16 y);\n" |
| 35063 | "#ifdef cl_khr_fp64\n" |
| 35064 | "double __ovld __cnfn rootn(double x, int y);\n" |
| 35065 | "double2 __ovld __cnfn rootn(double2 x, int2 y);\n" |
| 35066 | "double3 __ovld __cnfn rootn(double3 x, int3 y);\n" |
| 35067 | "double4 __ovld __cnfn rootn(double4 x, int4 y);\n" |
| 35068 | "double8 __ovld __cnfn rootn(double8 x, int8 y);\n" |
| 35069 | "double16 __ovld __cnfn rootn(double16 x, int16 y);\n" |
| 35070 | "#endif //cl_khr_fp64\n" |
| 35071 | "#ifdef cl_khr_fp16\n" |
| 35072 | "half __ovld __cnfn rootn(half x, int y);\n" |
| 35073 | "half2 __ovld __cnfn rootn(half2 x, int2 y);\n" |
| 35074 | "half3 __ovld __cnfn rootn(half3 x, int3 y);\n" |
| 35075 | "half4 __ovld __cnfn rootn(half4 x, int4 y);\n" |
| 35076 | "half8 __ovld __cnfn rootn(half8 x, int8 y);\n" |
| 35077 | "half16 __ovld __cnfn rootn(half16 x, int16 y);\n" |
| 35078 | "#endif //cl_khr_fp16\n" |
| 35079 | "\n" |
| 35080 | "/**\n" |
| 35081 | " * Return the integral value nearest to x rounding\n" |
| 35082 | " * halfway cases away from zero, regardless of the\n" |
| 35083 | " * current rounding direction.\n" |
| 35084 | " */\n" |
| 35085 | "float __ovld __cnfn round(float x);\n" |
| 35086 | "float2 __ovld __cnfn round(float2 x);\n" |
| 35087 | "float3 __ovld __cnfn round(float3 x);\n" |
| 35088 | "float4 __ovld __cnfn round(float4 x);\n" |
| 35089 | "float8 __ovld __cnfn round(float8 x);\n" |
| 35090 | "float16 __ovld __cnfn round(float16 x);\n" |
| 35091 | "#ifdef cl_khr_fp64\n" |
| 35092 | "double __ovld __cnfn round(double x);\n" |
| 35093 | "double2 __ovld __cnfn round(double2 x);\n" |
| 35094 | "double3 __ovld __cnfn round(double3 x);\n" |
| 35095 | "double4 __ovld __cnfn round(double4 x);\n" |
| 35096 | "double8 __ovld __cnfn round(double8 x);\n" |
| 35097 | "double16 __ovld __cnfn round(double16 x);\n" |
| 35098 | "#endif //cl_khr_fp64\n" |
| 35099 | "#ifdef cl_khr_fp16\n" |
| 35100 | "half __ovld __cnfn round(half x);\n" |
| 35101 | "half2 __ovld __cnfn round(half2 x);\n" |
| 35102 | "half3 __ovld __cnfn round(half3 x);\n" |
| 35103 | "half4 __ovld __cnfn round(half4 x);\n" |
| 35104 | "half8 __ovld __cnfn round(half8 x);\n" |
| 35105 | "half16 __ovld __cnfn round(half16 x);\n" |
| 35106 | "#endif //cl_khr_fp16\n" |
| 35107 | "\n" |
| 35108 | "/**\n" |
| 35109 | " * Compute inverse square root.\n" |
| 35110 | " */\n" |
| 35111 | "float __ovld __cnfn rsqrt(float);\n" |
| 35112 | "float2 __ovld __cnfn rsqrt(float2);\n" |
| 35113 | "float3 __ovld __cnfn rsqrt(float3);\n" |
| 35114 | "float4 __ovld __cnfn rsqrt(float4);\n" |
| 35115 | "float8 __ovld __cnfn rsqrt(float8);\n" |
| 35116 | "float16 __ovld __cnfn rsqrt(float16);\n" |
| 35117 | "#ifdef cl_khr_fp64\n" |
| 35118 | "double __ovld __cnfn rsqrt(double);\n" |
| 35119 | "double2 __ovld __cnfn rsqrt(double2);\n" |
| 35120 | "double3 __ovld __cnfn rsqrt(double3);\n" |
| 35121 | "double4 __ovld __cnfn rsqrt(double4);\n" |
| 35122 | "double8 __ovld __cnfn rsqrt(double8);\n" |
| 35123 | "double16 __ovld __cnfn rsqrt(double16);\n" |
| 35124 | "#endif //cl_khr_fp64\n" |
| 35125 | "#ifdef cl_khr_fp16\n" |
| 35126 | "half __ovld __cnfn rsqrt(half);\n" |
| 35127 | "half2 __ovld __cnfn rsqrt(half2);\n" |
| 35128 | "half3 __ovld __cnfn rsqrt(half3);\n" |
| 35129 | "half4 __ovld __cnfn rsqrt(half4);\n" |
| 35130 | "half8 __ovld __cnfn rsqrt(half8);\n" |
| 35131 | "half16 __ovld __cnfn rsqrt(half16);\n" |
| 35132 | "#endif //cl_khr_fp16\n" |
| 35133 | "\n" |
| 35134 | "/**\n" |
| 35135 | " * Compute sine.\n" |
| 35136 | " */\n" |
| 35137 | "float __ovld __cnfn sin(float);\n" |
| 35138 | "float2 __ovld __cnfn sin(float2);\n" |
| 35139 | "float3 __ovld __cnfn sin(float3);\n" |
| 35140 | "float4 __ovld __cnfn sin(float4);\n" |
| 35141 | "float8 __ovld __cnfn sin(float8);\n" |
| 35142 | "float16 __ovld __cnfn sin(float16);\n" |
| 35143 | "#ifdef cl_khr_fp64\n" |
| 35144 | "double __ovld __cnfn sin(double);\n" |
| 35145 | "double2 __ovld __cnfn sin(double2);\n" |
| 35146 | "double3 __ovld __cnfn sin(double3);\n" |
| 35147 | "double4 __ovld __cnfn sin(double4);\n" |
| 35148 | "double8 __ovld __cnfn sin(double8);\n" |
| 35149 | "double16 __ovld __cnfn sin(double16);\n" |
| 35150 | "#endif //cl_khr_fp64\n" |
| 35151 | "#ifdef cl_khr_fp16\n" |
| 35152 | "half __ovld __cnfn sin(half);\n" |
| 35153 | "half2 __ovld __cnfn sin(half2);\n" |
| 35154 | "half3 __ovld __cnfn sin(half3);\n" |
| 35155 | "half4 __ovld __cnfn sin(half4);\n" |
| 35156 | "half8 __ovld __cnfn sin(half8);\n" |
| 35157 | "half16 __ovld __cnfn sin(half16);\n" |
| 35158 | "#endif //cl_khr_fp16\n" |
| 35159 | "\n" |
| 35160 | "/**\n" |
| 35161 | " * Compute sine and cosine of x. The computed sine\n" |
| 35162 | " * is the return value and computed cosine is returned\n" |
| 35163 | " * in cosval.\n" |
| 35164 | " */\n" |
| 35165 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 35166 | "float __ovld sincos(float x, float *cosval);\n" |
| 35167 | "float2 __ovld sincos(float2 x, float2 *cosval);\n" |
| 35168 | "float3 __ovld sincos(float3 x, float3 *cosval);\n" |
| 35169 | "float4 __ovld sincos(float4 x, float4 *cosval);\n" |
| 35170 | "float8 __ovld sincos(float8 x, float8 *cosval);\n" |
| 35171 | "float16 __ovld sincos(float16 x, float16 *cosval);\n" |
| 35172 | "#ifdef cl_khr_fp64\n" |
| 35173 | "double __ovld sincos(double x, double *cosval);\n" |
| 35174 | "double2 __ovld sincos(double2 x, double2 *cosval);\n" |
| 35175 | "double3 __ovld sincos(double3 x, double3 *cosval);\n" |
| 35176 | "double4 __ovld sincos(double4 x, double4 *cosval);\n" |
| 35177 | "double8 __ovld sincos(double8 x, double8 *cosval);\n" |
| 35178 | "double16 __ovld sincos(double16 x, double16 *cosval);\n" |
| 35179 | "#endif //cl_khr_fp64\n" |
| 35180 | "#ifdef cl_khr_fp16\n" |
| 35181 | "half __ovld sincos(half x, half *cosval);\n" |
| 35182 | "half2 __ovld sincos(half2 x, half2 *cosval);\n" |
| 35183 | "half3 __ovld sincos(half3 x, half3 *cosval);\n" |
| 35184 | "half4 __ovld sincos(half4 x, half4 *cosval);\n" |
| 35185 | "half8 __ovld sincos(half8 x, half8 *cosval);\n" |
| 35186 | "half16 __ovld sincos(half16 x, half16 *cosval);\n" |
| 35187 | "#endif //cl_khr_fp16\n" |
| 35188 | "#else\n" |
| 35189 | "float __ovld sincos(float x, __global float *cosval);\n" |
| 35190 | "float2 __ovld sincos(float2 x, __global float2 *cosval);\n" |
| 35191 | "float3 __ovld sincos(float3 x, __global float3 *cosval);\n" |
| 35192 | "float4 __ovld sincos(float4 x, __global float4 *cosval);\n" |
| 35193 | "float8 __ovld sincos(float8 x, __global float8 *cosval);\n" |
| 35194 | "float16 __ovld sincos(float16 x, __global float16 *cosval);\n" |
| 35195 | "float __ovld sincos(float x, __local float *cosval);\n" |
| 35196 | "float2 __ovld sincos(float2 x, __local float2 *cosval);\n" |
| 35197 | "float3 __ovld sincos(float3 x, __local float3 *cosval);\n" |
| 35198 | "float4 __ovld sincos(float4 x, __local float4 *cosval);\n" |
| 35199 | "float8 __ovld sincos(float8 x, __local float8 *cosval);\n" |
| 35200 | "float16 __ovld sincos(float16 x, __local float16 *cosval);\n" |
| 35201 | "float __ovld sincos(float x, __private float *cosval);\n" |
| 35202 | "float2 __ovld sincos(float2 x, __private float2 *cosval);\n" |
| 35203 | "float3 __ovld sincos(float3 x, __private float3 *cosval);\n" |
| 35204 | "float4 __ovld sincos(float4 x, __private float4 *cosval);\n" |
| 35205 | "float8 __ovld sincos(float8 x, __private float8 *cosval);\n" |
| 35206 | "float16 __ovld sincos(float16 x, __private float16 *cosval);\n" |
| 35207 | "#ifdef cl_khr_fp64\n" |
| 35208 | "double __ovld sincos(double x, __global double *cosval);\n" |
| 35209 | "double2 __ovld sincos(double2 x, __global double2 *cosval);\n" |
| 35210 | "double3 __ovld sincos(double3 x, __global double3 *cosval);\n" |
| 35211 | "double4 __ovld sincos(double4 x, __global double4 *cosval);\n" |
| 35212 | "double8 __ovld sincos(double8 x, __global double8 *cosval);\n" |
| 35213 | "double16 __ovld sincos(double16 x, __global double16 *cosval);\n" |
| 35214 | "double __ovld sincos(double x, __local double *cosval);\n" |
| 35215 | "double2 __ovld sincos(double2 x, __local double2 *cosval);\n" |
| 35216 | "double3 __ovld sincos(double3 x, __local double3 *cosval);\n" |
| 35217 | "double4 __ovld sincos(double4 x, __local double4 *cosval);\n" |
| 35218 | "double8 __ovld sincos(double8 x, __local double8 *cosval);\n" |
| 35219 | "double16 __ovld sincos(double16 x, __local double16 *cosval);\n" |
| 35220 | "double __ovld sincos(double x, __private double *cosval);\n" |
| 35221 | "double2 __ovld sincos(double2 x, __private double2 *cosval);\n" |
| 35222 | "double3 __ovld sincos(double3 x, __private double3 *cosval);\n" |
| 35223 | "double4 __ovld sincos(double4 x, __private double4 *cosval);\n" |
| 35224 | "double8 __ovld sincos(double8 x, __private double8 *cosval);\n" |
| 35225 | "double16 __ovld sincos(double16 x, __private double16 *cosval);\n" |
| 35226 | "#endif //cl_khr_fp64\n" |
| 35227 | "#ifdef cl_khr_fp16\n" |
| 35228 | "half __ovld sincos(half x, __global half *cosval);\n" |
| 35229 | "half2 __ovld sincos(half2 x, __global half2 *cosval);\n" |
| 35230 | "half3 __ovld sincos(half3 x, __global half3 *cosval);\n" |
| 35231 | "half4 __ovld sincos(half4 x, __global half4 *cosval);\n" |
| 35232 | "half8 __ovld sincos(half8 x, __global half8 *cosval);\n" |
| 35233 | "half16 __ovld sincos(half16 x, __global half16 *cosval);\n" |
| 35234 | "half __ovld sincos(half x, __local half *cosval);\n" |
| 35235 | "half2 __ovld sincos(half2 x, __local half2 *cosval);\n" |
| 35236 | "half3 __ovld sincos(half3 x, __local half3 *cosval);\n" |
| 35237 | "half4 __ovld sincos(half4 x, __local half4 *cosval);\n" |
| 35238 | "half8 __ovld sincos(half8 x, __local half8 *cosval);\n" |
| 35239 | "half16 __ovld sincos(half16 x, __local half16 *cosval);\n" |
| 35240 | "half __ovld sincos(half x, __private half *cosval);\n" |
| 35241 | "half2 __ovld sincos(half2 x, __private half2 *cosval);\n" |
| 35242 | "half3 __ovld sincos(half3 x, __private half3 *cosval);\n" |
| 35243 | "half4 __ovld sincos(half4 x, __private half4 *cosval);\n" |
| 35244 | "half8 __ovld sincos(half8 x, __private half8 *cosval);\n" |
| 35245 | "half16 __ovld sincos(half16 x, __private half16 *cosval);\n" |
| 35246 | "#endif //cl_khr_fp16\n" |
| 35247 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 35248 | "\n" |
| 35249 | "/**\n" |
| 35250 | " * Compute hyperbolic sine.\n" |
| 35251 | " */\n" |
| 35252 | "float __ovld __cnfn sinh(float);\n" |
| 35253 | "float2 __ovld __cnfn sinh(float2);\n" |
| 35254 | "float3 __ovld __cnfn sinh(float3);\n" |
| 35255 | "float4 __ovld __cnfn sinh(float4);\n" |
| 35256 | "float8 __ovld __cnfn sinh(float8);\n" |
| 35257 | "float16 __ovld __cnfn sinh(float16);\n" |
| 35258 | "#ifdef cl_khr_fp64\n" |
| 35259 | "double __ovld __cnfn sinh(double);\n" |
| 35260 | "double2 __ovld __cnfn sinh(double2);\n" |
| 35261 | "double3 __ovld __cnfn sinh(double3);\n" |
| 35262 | "double4 __ovld __cnfn sinh(double4);\n" |
| 35263 | "double8 __ovld __cnfn sinh(double8);\n" |
| 35264 | "double16 __ovld __cnfn sinh(double16);\n" |
| 35265 | "#endif //cl_khr_fp64\n" |
| 35266 | "#ifdef cl_khr_fp16\n" |
| 35267 | "half __ovld __cnfn sinh(half);\n" |
| 35268 | "half2 __ovld __cnfn sinh(half2);\n" |
| 35269 | "half3 __ovld __cnfn sinh(half3);\n" |
| 35270 | "half4 __ovld __cnfn sinh(half4);\n" |
| 35271 | "half8 __ovld __cnfn sinh(half8);\n" |
| 35272 | "half16 __ovld __cnfn sinh(half16);\n" |
| 35273 | "#endif //cl_khr_fp16\n" |
| 35274 | "\n" |
| 35275 | "/**\n" |
| 35276 | " * Compute sin (PI * x).\n" |
| 35277 | " */\n" |
| 35278 | "float __ovld __cnfn sinpi(float x);\n" |
| 35279 | "float2 __ovld __cnfn sinpi(float2 x);\n" |
| 35280 | "float3 __ovld __cnfn sinpi(float3 x);\n" |
| 35281 | "float4 __ovld __cnfn sinpi(float4 x);\n" |
| 35282 | "float8 __ovld __cnfn sinpi(float8 x);\n" |
| 35283 | "float16 __ovld __cnfn sinpi(float16 x);\n" |
| 35284 | "#ifdef cl_khr_fp64\n" |
| 35285 | "double __ovld __cnfn sinpi(double x);\n" |
| 35286 | "double2 __ovld __cnfn sinpi(double2 x);\n" |
| 35287 | "double3 __ovld __cnfn sinpi(double3 x);\n" |
| 35288 | "double4 __ovld __cnfn sinpi(double4 x);\n" |
| 35289 | "double8 __ovld __cnfn sinpi(double8 x);\n" |
| 35290 | "double16 __ovld __cnfn sinpi(double16 x);\n" |
| 35291 | "#endif //cl_khr_fp64\n" |
| 35292 | "#ifdef cl_khr_fp16\n" |
| 35293 | "half __ovld __cnfn sinpi(half x);\n" |
| 35294 | "half2 __ovld __cnfn sinpi(half2 x);\n" |
| 35295 | "half3 __ovld __cnfn sinpi(half3 x);\n" |
| 35296 | "half4 __ovld __cnfn sinpi(half4 x);\n" |
| 35297 | "half8 __ovld __cnfn sinpi(half8 x);\n" |
| 35298 | "half16 __ovld __cnfn sinpi(half16 x);\n" |
| 35299 | "#endif //cl_khr_fp16\n" |
| 35300 | "\n" |
| 35301 | "/**\n" |
| 35302 | " * Compute square root.\n" |
| 35303 | " */\n" |
| 35304 | "float __ovld __cnfn sqrt(float);\n" |
| 35305 | "float2 __ovld __cnfn sqrt(float2);\n" |
| 35306 | "float3 __ovld __cnfn sqrt(float3);\n" |
| 35307 | "float4 __ovld __cnfn sqrt(float4);\n" |
| 35308 | "float8 __ovld __cnfn sqrt(float8);\n" |
| 35309 | "float16 __ovld __cnfn sqrt(float16);\n" |
| 35310 | "#ifdef cl_khr_fp64\n" |
| 35311 | "double __ovld __cnfn sqrt(double);\n" |
| 35312 | "double2 __ovld __cnfn sqrt(double2);\n" |
| 35313 | "double3 __ovld __cnfn sqrt(double3);\n" |
| 35314 | "double4 __ovld __cnfn sqrt(double4);\n" |
| 35315 | "double8 __ovld __cnfn sqrt(double8);\n" |
| 35316 | "double16 __ovld __cnfn sqrt(double16);\n" |
| 35317 | "#endif //cl_khr_fp64\n" |
| 35318 | "#ifdef cl_khr_fp16\n" |
| 35319 | "half __ovld __cnfn sqrt(half);\n" |
| 35320 | "half2 __ovld __cnfn sqrt(half2);\n" |
| 35321 | "half3 __ovld __cnfn sqrt(half3);\n" |
| 35322 | "half4 __ovld __cnfn sqrt(half4);\n" |
| 35323 | "half8 __ovld __cnfn sqrt(half8);\n" |
| 35324 | "half16 __ovld __cnfn sqrt(half16);\n" |
| 35325 | "#endif //cl_khr_fp16\n" |
| 35326 | "\n" |
| 35327 | "/**\n" |
| 35328 | " * Compute tangent.\n" |
| 35329 | " */\n" |
| 35330 | "float __ovld __cnfn tan(float);\n" |
| 35331 | "float2 __ovld __cnfn tan(float2);\n" |
| 35332 | "float3 __ovld __cnfn tan(float3);\n" |
| 35333 | "float4 __ovld __cnfn tan(float4);\n" |
| 35334 | "float8 __ovld __cnfn tan(float8);\n" |
| 35335 | "float16 __ovld __cnfn tan(float16);\n" |
| 35336 | "#ifdef cl_khr_fp64\n" |
| 35337 | "double __ovld __cnfn tan(double);\n" |
| 35338 | "double2 __ovld __cnfn tan(double2);\n" |
| 35339 | "double3 __ovld __cnfn tan(double3);\n" |
| 35340 | "double4 __ovld __cnfn tan(double4);\n" |
| 35341 | "double8 __ovld __cnfn tan(double8);\n" |
| 35342 | "double16 __ovld __cnfn tan(double16);\n" |
| 35343 | "#endif //cl_khr_fp64\n" |
| 35344 | "#ifdef cl_khr_fp16\n" |
| 35345 | "half __ovld __cnfn tan(half);\n" |
| 35346 | "half2 __ovld __cnfn tan(half2);\n" |
| 35347 | "half3 __ovld __cnfn tan(half3);\n" |
| 35348 | "half4 __ovld __cnfn tan(half4);\n" |
| 35349 | "half8 __ovld __cnfn tan(half8);\n" |
| 35350 | "half16 __ovld __cnfn tan(half16);\n" |
| 35351 | "#endif //cl_khr_fp16\n" |
| 35352 | "\n" |
| 35353 | "/**\n" |
| 35354 | " * Compute hyperbolic tangent.\n" |
| 35355 | " */\n" |
| 35356 | "float __ovld __cnfn tanh(float);\n" |
| 35357 | "float2 __ovld __cnfn tanh(float2);\n" |
| 35358 | "float3 __ovld __cnfn tanh(float3);\n" |
| 35359 | "float4 __ovld __cnfn tanh(float4);\n" |
| 35360 | "float8 __ovld __cnfn tanh(float8);\n" |
| 35361 | "float16 __ovld __cnfn tanh(float16);\n" |
| 35362 | "#ifdef cl_khr_fp64\n" |
| 35363 | "double __ovld __cnfn tanh(double);\n" |
| 35364 | "double2 __ovld __cnfn tanh(double2);\n" |
| 35365 | "double3 __ovld __cnfn tanh(double3);\n" |
| 35366 | "double4 __ovld __cnfn tanh(double4);\n" |
| 35367 | "double8 __ovld __cnfn tanh(double8);\n" |
| 35368 | "double16 __ovld __cnfn tanh(double16);\n" |
| 35369 | "#endif //cl_khr_fp64\n" |
| 35370 | "#ifdef cl_khr_fp16\n" |
| 35371 | "half __ovld __cnfn tanh(half);\n" |
| 35372 | "half2 __ovld __cnfn tanh(half2);\n" |
| 35373 | "half3 __ovld __cnfn tanh(half3);\n" |
| 35374 | "half4 __ovld __cnfn tanh(half4);\n" |
| 35375 | "half8 __ovld __cnfn tanh(half8);\n" |
| 35376 | "half16 __ovld __cnfn tanh(half16);\n" |
| 35377 | "#endif //cl_khr_fp16\n" |
| 35378 | "\n" |
| 35379 | "/**\n" |
| 35380 | " * Compute tan (PI * x).\n" |
| 35381 | " */\n" |
| 35382 | "float __ovld __cnfn tanpi(float x);\n" |
| 35383 | "float2 __ovld __cnfn tanpi(float2 x);\n" |
| 35384 | "float3 __ovld __cnfn tanpi(float3 x);\n" |
| 35385 | "float4 __ovld __cnfn tanpi(float4 x);\n" |
| 35386 | "float8 __ovld __cnfn tanpi(float8 x);\n" |
| 35387 | "float16 __ovld __cnfn tanpi(float16 x);\n" |
| 35388 | "#ifdef cl_khr_fp64\n" |
| 35389 | "double __ovld __cnfn tanpi(double x);\n" |
| 35390 | "double2 __ovld __cnfn tanpi(double2 x);\n" |
| 35391 | "double3 __ovld __cnfn tanpi(double3 x);\n" |
| 35392 | "double4 __ovld __cnfn tanpi(double4 x);\n" |
| 35393 | "double8 __ovld __cnfn tanpi(double8 x);\n" |
| 35394 | "double16 __ovld __cnfn tanpi(double16 x);\n" |
| 35395 | "#endif //cl_khr_fp64\n" |
| 35396 | "#ifdef cl_khr_fp16\n" |
| 35397 | "half __ovld __cnfn tanpi(half x);\n" |
| 35398 | "half2 __ovld __cnfn tanpi(half2 x);\n" |
| 35399 | "half3 __ovld __cnfn tanpi(half3 x);\n" |
| 35400 | "half4 __ovld __cnfn tanpi(half4 x);\n" |
| 35401 | "half8 __ovld __cnfn tanpi(half8 x);\n" |
| 35402 | "half16 __ovld __cnfn tanpi(half16 x);\n" |
| 35403 | "#endif //cl_khr_fp16\n" |
| 35404 | "\n" |
| 35405 | "/**\n" |
| 35406 | " * Compute the gamma function.\n" |
| 35407 | " */\n" |
| 35408 | "float __ovld __cnfn tgamma(float);\n" |
| 35409 | "float2 __ovld __cnfn tgamma(float2);\n" |
| 35410 | "float3 __ovld __cnfn tgamma(float3);\n" |
| 35411 | "float4 __ovld __cnfn tgamma(float4);\n" |
| 35412 | "float8 __ovld __cnfn tgamma(float8);\n" |
| 35413 | "float16 __ovld __cnfn tgamma(float16);\n" |
| 35414 | "#ifdef cl_khr_fp64\n" |
| 35415 | "double __ovld __cnfn tgamma(double);\n" |
| 35416 | "double2 __ovld __cnfn tgamma(double2);\n" |
| 35417 | "double3 __ovld __cnfn tgamma(double3);\n" |
| 35418 | "double4 __ovld __cnfn tgamma(double4);\n" |
| 35419 | "double8 __ovld __cnfn tgamma(double8);\n" |
| 35420 | "double16 __ovld __cnfn tgamma(double16);\n" |
| 35421 | "#endif //cl_khr_fp64\n" |
| 35422 | "#ifdef cl_khr_fp16\n" |
| 35423 | "half __ovld __cnfn tgamma(half);\n" |
| 35424 | "half2 __ovld __cnfn tgamma(half2);\n" |
| 35425 | "half3 __ovld __cnfn tgamma(half3);\n" |
| 35426 | "half4 __ovld __cnfn tgamma(half4);\n" |
| 35427 | "half8 __ovld __cnfn tgamma(half8);\n" |
| 35428 | "half16 __ovld __cnfn tgamma(half16);\n" |
| 35429 | "#endif //cl_khr_fp16\n" |
| 35430 | "\n" |
| 35431 | "/**\n" |
| 35432 | " * Round to integral value using the round to zero\n" |
| 35433 | " * rounding mode.\n" |
| 35434 | " */\n" |
| 35435 | "float __ovld __cnfn trunc(float);\n" |
| 35436 | "float2 __ovld __cnfn trunc(float2);\n" |
| 35437 | "float3 __ovld __cnfn trunc(float3);\n" |
| 35438 | "float4 __ovld __cnfn trunc(float4);\n" |
| 35439 | "float8 __ovld __cnfn trunc(float8);\n" |
| 35440 | "float16 __ovld __cnfn trunc(float16);\n" |
| 35441 | "#ifdef cl_khr_fp64\n" |
| 35442 | "double __ovld __cnfn trunc(double);\n" |
| 35443 | "double2 __ovld __cnfn trunc(double2);\n" |
| 35444 | "double3 __ovld __cnfn trunc(double3);\n" |
| 35445 | "double4 __ovld __cnfn trunc(double4);\n" |
| 35446 | "double8 __ovld __cnfn trunc(double8);\n" |
| 35447 | "double16 __ovld __cnfn trunc(double16);\n" |
| 35448 | "#endif //cl_khr_fp64\n" |
| 35449 | "#ifdef cl_khr_fp16\n" |
| 35450 | "half __ovld __cnfn trunc(half);\n" |
| 35451 | "half2 __ovld __cnfn trunc(half2);\n" |
| 35452 | "half3 __ovld __cnfn trunc(half3);\n" |
| 35453 | "half4 __ovld __cnfn trunc(half4);\n" |
| 35454 | "half8 __ovld __cnfn trunc(half8);\n" |
| 35455 | "half16 __ovld __cnfn trunc(half16);\n" |
| 35456 | "#endif //cl_khr_fp16\n" |
| 35457 | "\n" |
| 35458 | "/**\n" |
| 35459 | " * Compute cosine. x must be in the range -2^16 ... +2^16.\n" |
| 35460 | " */\n" |
| 35461 | "float __ovld __cnfn half_cos(float x);\n" |
| 35462 | "float2 __ovld __cnfn half_cos(float2 x);\n" |
| 35463 | "float3 __ovld __cnfn half_cos(float3 x);\n" |
| 35464 | "float4 __ovld __cnfn half_cos(float4 x);\n" |
| 35465 | "float8 __ovld __cnfn half_cos(float8 x);\n" |
| 35466 | "float16 __ovld __cnfn half_cos(float16 x);\n" |
| 35467 | "\n" |
| 35468 | "/**\n" |
| 35469 | " * Compute x / y.\n" |
| 35470 | " */\n" |
| 35471 | "float __ovld __cnfn half_divide(float x, float y);\n" |
| 35472 | "float2 __ovld __cnfn half_divide(float2 x, float2 y);\n" |
| 35473 | "float3 __ovld __cnfn half_divide(float3 x, float3 y);\n" |
| 35474 | "float4 __ovld __cnfn half_divide(float4 x, float4 y);\n" |
| 35475 | "float8 __ovld __cnfn half_divide(float8 x, float8 y);\n" |
| 35476 | "float16 __ovld __cnfn half_divide(float16 x, float16 y);\n" |
| 35477 | "\n" |
| 35478 | "/**\n" |
| 35479 | " * Compute the base- e exponential of x.\n" |
| 35480 | " */\n" |
| 35481 | "float __ovld __cnfn half_exp(float x);\n" |
| 35482 | "float2 __ovld __cnfn half_exp(float2 x);\n" |
| 35483 | "float3 __ovld __cnfn half_exp(float3 x);\n" |
| 35484 | "float4 __ovld __cnfn half_exp(float4 x);\n" |
| 35485 | "float8 __ovld __cnfn half_exp(float8 x);\n" |
| 35486 | "float16 __ovld __cnfn half_exp(float16 x);\n" |
| 35487 | "\n" |
| 35488 | "/**\n" |
| 35489 | " * Compute the base- 2 exponential of x.\n" |
| 35490 | " */\n" |
| 35491 | "float __ovld __cnfn half_exp2(float x);\n" |
| 35492 | "float2 __ovld __cnfn half_exp2(float2 x);\n" |
| 35493 | "float3 __ovld __cnfn half_exp2(float3 x);\n" |
| 35494 | "float4 __ovld __cnfn half_exp2(float4 x);\n" |
| 35495 | "float8 __ovld __cnfn half_exp2(float8 x);\n" |
| 35496 | "float16 __ovld __cnfn half_exp2(float16 x);\n" |
| 35497 | "\n" |
| 35498 | "/**\n" |
| 35499 | " * Compute the base- 10 exponential of x.\n" |
| 35500 | " */\n" |
| 35501 | "float __ovld __cnfn half_exp10(float x);\n" |
| 35502 | "float2 __ovld __cnfn half_exp10(float2 x);\n" |
| 35503 | "float3 __ovld __cnfn half_exp10(float3 x);\n" |
| 35504 | "float4 __ovld __cnfn half_exp10(float4 x);\n" |
| 35505 | "float8 __ovld __cnfn half_exp10(float8 x);\n" |
| 35506 | "float16 __ovld __cnfn half_exp10(float16 x);\n" |
| 35507 | "\n" |
| 35508 | "/**\n" |
| 35509 | " * Compute natural logarithm.\n" |
| 35510 | " */\n" |
| 35511 | "float __ovld __cnfn half_log(float x);\n" |
| 35512 | "float2 __ovld __cnfn half_log(float2 x);\n" |
| 35513 | "float3 __ovld __cnfn half_log(float3 x);\n" |
| 35514 | "float4 __ovld __cnfn half_log(float4 x);\n" |
| 35515 | "float8 __ovld __cnfn half_log(float8 x);\n" |
| 35516 | "float16 __ovld __cnfn half_log(float16 x);\n" |
| 35517 | "\n" |
| 35518 | "/**\n" |
| 35519 | " * Compute a base 2 logarithm.\n" |
| 35520 | " */\n" |
| 35521 | "float __ovld __cnfn half_log2(float x);\n" |
| 35522 | "float2 __ovld __cnfn half_log2(float2 x);\n" |
| 35523 | "float3 __ovld __cnfn half_log2(float3 x);\n" |
| 35524 | "float4 __ovld __cnfn half_log2(float4 x);\n" |
| 35525 | "float8 __ovld __cnfn half_log2(float8 x);\n" |
| 35526 | "float16 __ovld __cnfn half_log2(float16 x);\n" |
| 35527 | "\n" |
| 35528 | "/**\n" |
| 35529 | " * Compute a base 10 logarithm.\n" |
| 35530 | " */\n" |
| 35531 | "float __ovld __cnfn half_log10(float x);\n" |
| 35532 | "float2 __ovld __cnfn half_log10(float2 x);\n" |
| 35533 | "float3 __ovld __cnfn half_log10(float3 x);\n" |
| 35534 | "float4 __ovld __cnfn half_log10(float4 x);\n" |
| 35535 | "float8 __ovld __cnfn half_log10(float8 x);\n" |
| 35536 | "float16 __ovld __cnfn half_log10(float16 x);\n" |
| 35537 | "\n" |
| 35538 | "/**\n" |
| 35539 | " * Compute x to the power y, where x is >= 0.\n" |
| 35540 | " */\n" |
| 35541 | "float __ovld __cnfn half_powr(float x, float y);\n" |
| 35542 | "float2 __ovld __cnfn half_powr(float2 x, float2 y);\n" |
| 35543 | "float3 __ovld __cnfn half_powr(float3 x, float3 y);\n" |
| 35544 | "float4 __ovld __cnfn half_powr(float4 x, float4 y);\n" |
| 35545 | "float8 __ovld __cnfn half_powr(float8 x, float8 y);\n" |
| 35546 | "float16 __ovld __cnfn half_powr(float16 x, float16 y);\n" |
| 35547 | "\n" |
| 35548 | "/**\n" |
| 35549 | " * Compute reciprocal.\n" |
| 35550 | " */\n" |
| 35551 | "float __ovld __cnfn half_recip(float x);\n" |
| 35552 | "float2 __ovld __cnfn half_recip(float2 x);\n" |
| 35553 | "float3 __ovld __cnfn half_recip(float3 x);\n" |
| 35554 | "float4 __ovld __cnfn half_recip(float4 x);\n" |
| 35555 | "float8 __ovld __cnfn half_recip(float8 x);\n" |
| 35556 | "float16 __ovld __cnfn half_recip(float16 x);\n" |
| 35557 | "\n" |
| 35558 | "/**\n" |
| 35559 | " * Compute inverse square root.\n" |
| 35560 | " */\n" |
| 35561 | "float __ovld __cnfn half_rsqrt(float x);\n" |
| 35562 | "float2 __ovld __cnfn half_rsqrt(float2 x);\n" |
| 35563 | "float3 __ovld __cnfn half_rsqrt(float3 x);\n" |
| 35564 | "float4 __ovld __cnfn half_rsqrt(float4 x);\n" |
| 35565 | "float8 __ovld __cnfn half_rsqrt(float8 x);\n" |
| 35566 | "float16 __ovld __cnfn half_rsqrt(float16 x);\n" |
| 35567 | "\n" |
| 35568 | "/**\n" |
| 35569 | " * Compute sine. x must be in the range -2^16 ... +2^16.\n" |
| 35570 | " */\n" |
| 35571 | "float __ovld __cnfn half_sin(float x);\n" |
| 35572 | "float2 __ovld __cnfn half_sin(float2 x);\n" |
| 35573 | "float3 __ovld __cnfn half_sin(float3 x);\n" |
| 35574 | "float4 __ovld __cnfn half_sin(float4 x);\n" |
| 35575 | "float8 __ovld __cnfn half_sin(float8 x);\n" |
| 35576 | "float16 __ovld __cnfn half_sin(float16 x);\n" |
| 35577 | "\n" |
| 35578 | "/**\n" |
| 35579 | " * Compute square root.\n" |
| 35580 | " */\n" |
| 35581 | "float __ovld __cnfn half_sqrt(float x);\n" |
| 35582 | "float2 __ovld __cnfn half_sqrt(float2 x);\n" |
| 35583 | "float3 __ovld __cnfn half_sqrt(float3 x);\n" |
| 35584 | "float4 __ovld __cnfn half_sqrt(float4 x);\n" |
| 35585 | "float8 __ovld __cnfn half_sqrt(float8 x);\n" |
| 35586 | "float16 __ovld __cnfn half_sqrt(float16 x);\n" |
| 35587 | "\n" |
| 35588 | "/**\n" |
| 35589 | " * Compute tangent. x must be in the range -216 ... +216.\n" |
| 35590 | " */\n" |
| 35591 | "float __ovld __cnfn half_tan(float x);\n" |
| 35592 | "float2 __ovld __cnfn half_tan(float2 x);\n" |
| 35593 | "float3 __ovld __cnfn half_tan(float3 x);\n" |
| 35594 | "float4 __ovld __cnfn half_tan(float4 x);\n" |
| 35595 | "float8 __ovld __cnfn half_tan(float8 x);\n" |
| 35596 | "float16 __ovld __cnfn half_tan(float16 x);\n" |
| 35597 | "\n" |
| 35598 | "/**\n" |
| 35599 | " * Compute cosine over an implementation-defined range.\n" |
| 35600 | " * The maximum error is implementation-defined.\n" |
| 35601 | " */\n" |
| 35602 | "float __ovld __cnfn native_cos(float x);\n" |
| 35603 | "float2 __ovld __cnfn native_cos(float2 x);\n" |
| 35604 | "float3 __ovld __cnfn native_cos(float3 x);\n" |
| 35605 | "float4 __ovld __cnfn native_cos(float4 x);\n" |
| 35606 | "float8 __ovld __cnfn native_cos(float8 x);\n" |
| 35607 | "float16 __ovld __cnfn native_cos(float16 x);\n" |
| 35608 | "\n" |
| 35609 | "/**\n" |
| 35610 | " * Compute x / y over an implementation-defined range.\n" |
| 35611 | " * The maximum error is implementation-defined.\n" |
| 35612 | " */\n" |
| 35613 | "float __ovld __cnfn native_divide(float x, float y);\n" |
| 35614 | "float2 __ovld __cnfn native_divide(float2 x, float2 y);\n" |
| 35615 | "float3 __ovld __cnfn native_divide(float3 x, float3 y);\n" |
| 35616 | "float4 __ovld __cnfn native_divide(float4 x, float4 y);\n" |
| 35617 | "float8 __ovld __cnfn native_divide(float8 x, float8 y);\n" |
| 35618 | "float16 __ovld __cnfn native_divide(float16 x, float16 y);\n" |
| 35619 | "\n" |
| 35620 | "/**\n" |
| 35621 | " * Compute the base- e exponential of x over an\n" |
| 35622 | " * implementation-defined range. The maximum error is\n" |
| 35623 | " * implementation-defined.\n" |
| 35624 | " */\n" |
| 35625 | "float __ovld __cnfn native_exp(float x);\n" |
| 35626 | "float2 __ovld __cnfn native_exp(float2 x);\n" |
| 35627 | "float3 __ovld __cnfn native_exp(float3 x);\n" |
| 35628 | "float4 __ovld __cnfn native_exp(float4 x);\n" |
| 35629 | "float8 __ovld __cnfn native_exp(float8 x);\n" |
| 35630 | "float16 __ovld __cnfn native_exp(float16 x);\n" |
| 35631 | "\n" |
| 35632 | "/**\n" |
| 35633 | " * Compute the base- 2 exponential of x over an\n" |
| 35634 | " * implementation-defined range. The maximum error is\n" |
| 35635 | " * implementation-defined.\n" |
| 35636 | " */\n" |
| 35637 | "float __ovld __cnfn native_exp2(float x);\n" |
| 35638 | "float2 __ovld __cnfn native_exp2(float2 x);\n" |
| 35639 | "float3 __ovld __cnfn native_exp2(float3 x);\n" |
| 35640 | "float4 __ovld __cnfn native_exp2(float4 x);\n" |
| 35641 | "float8 __ovld __cnfn native_exp2(float8 x);\n" |
| 35642 | "float16 __ovld __cnfn native_exp2(float16 x);\n" |
| 35643 | "\n" |
| 35644 | "/**\n" |
| 35645 | " * Compute the base- 10 exponential of x over an\n" |
| 35646 | " * implementation-defined range. The maximum error is\n" |
| 35647 | " * implementation-defined.\n" |
| 35648 | " */\n" |
| 35649 | "float __ovld __cnfn native_exp10(float x);\n" |
| 35650 | "float2 __ovld __cnfn native_exp10(float2 x);\n" |
| 35651 | "float3 __ovld __cnfn native_exp10(float3 x);\n" |
| 35652 | "float4 __ovld __cnfn native_exp10(float4 x);\n" |
| 35653 | "float8 __ovld __cnfn native_exp10(float8 x);\n" |
| 35654 | "float16 __ovld __cnfn native_exp10(float16 x);\n" |
| 35655 | "\n" |
| 35656 | "/**\n" |
| 35657 | " * Compute natural logarithm over an implementationdefined\n" |
| 35658 | " * range. The maximum error is implementation\n" |
| 35659 | " * defined.\n" |
| 35660 | " */\n" |
| 35661 | "float __ovld __cnfn native_log(float x);\n" |
| 35662 | "float2 __ovld __cnfn native_log(float2 x);\n" |
| 35663 | "float3 __ovld __cnfn native_log(float3 x);\n" |
| 35664 | "float4 __ovld __cnfn native_log(float4 x);\n" |
| 35665 | "float8 __ovld __cnfn native_log(float8 x);\n" |
| 35666 | "float16 __ovld __cnfn native_log(float16 x);\n" |
| 35667 | "\n" |
| 35668 | "/**\n" |
| 35669 | " * Compute a base 2 logarithm over an implementationdefined\n" |
| 35670 | " * range. The maximum error is implementationdefined.\n" |
| 35671 | " */\n" |
| 35672 | "float __ovld __cnfn native_log2(float x);\n" |
| 35673 | "float2 __ovld __cnfn native_log2(float2 x);\n" |
| 35674 | "float3 __ovld __cnfn native_log2(float3 x);\n" |
| 35675 | "float4 __ovld __cnfn native_log2(float4 x);\n" |
| 35676 | "float8 __ovld __cnfn native_log2(float8 x);\n" |
| 35677 | "float16 __ovld __cnfn native_log2(float16 x);\n" |
| 35678 | "\n" |
| 35679 | "/**\n" |
| 35680 | " * Compute a base 10 logarithm over an implementationdefined\n" |
| 35681 | " * range. The maximum error is implementationdefined.\n" |
| 35682 | " */\n" |
| 35683 | "float __ovld __cnfn native_log10(float x);\n" |
| 35684 | "float2 __ovld __cnfn native_log10(float2 x);\n" |
| 35685 | "float3 __ovld __cnfn native_log10(float3 x);\n" |
| 35686 | "float4 __ovld __cnfn native_log10(float4 x);\n" |
| 35687 | "float8 __ovld __cnfn native_log10(float8 x);\n" |
| 35688 | "float16 __ovld __cnfn native_log10(float16 x);\n" |
| 35689 | "\n" |
| 35690 | "/**\n" |
| 35691 | " * Compute x to the power y, where x is >= 0. The range of\n" |
| 35692 | " * x and y are implementation-defined. The maximum error\n" |
| 35693 | " * is implementation-defined.\n" |
| 35694 | " */\n" |
| 35695 | "float __ovld __cnfn native_powr(float x, float y);\n" |
| 35696 | "float2 __ovld __cnfn native_powr(float2 x, float2 y);\n" |
| 35697 | "float3 __ovld __cnfn native_powr(float3 x, float3 y);\n" |
| 35698 | "float4 __ovld __cnfn native_powr(float4 x, float4 y);\n" |
| 35699 | "float8 __ovld __cnfn native_powr(float8 x, float8 y);\n" |
| 35700 | "float16 __ovld __cnfn native_powr(float16 x, float16 y);\n" |
| 35701 | "\n" |
| 35702 | "/**\n" |
| 35703 | " * Compute reciprocal over an implementation-defined\n" |
| 35704 | " * range. The maximum error is implementation-defined.\n" |
| 35705 | " */\n" |
| 35706 | "float __ovld __cnfn native_recip(float x);\n" |
| 35707 | "float2 __ovld __cnfn native_recip(float2 x);\n" |
| 35708 | "float3 __ovld __cnfn native_recip(float3 x);\n" |
| 35709 | "float4 __ovld __cnfn native_recip(float4 x);\n" |
| 35710 | "float8 __ovld __cnfn native_recip(float8 x);\n" |
| 35711 | "float16 __ovld __cnfn native_recip(float16 x);\n" |
| 35712 | "\n" |
| 35713 | "/**\n" |
| 35714 | " * Compute inverse square root over an implementationdefined\n" |
| 35715 | " * range. The maximum error is implementationdefined.\n" |
| 35716 | " */\n" |
| 35717 | "float __ovld __cnfn native_rsqrt(float x);\n" |
| 35718 | "float2 __ovld __cnfn native_rsqrt(float2 x);\n" |
| 35719 | "float3 __ovld __cnfn native_rsqrt(float3 x);\n" |
| 35720 | "float4 __ovld __cnfn native_rsqrt(float4 x);\n" |
| 35721 | "float8 __ovld __cnfn native_rsqrt(float8 x);\n" |
| 35722 | "float16 __ovld __cnfn native_rsqrt(float16 x);\n" |
| 35723 | "\n" |
| 35724 | "/**\n" |
| 35725 | " * Compute sine over an implementation-defined range.\n" |
| 35726 | " * The maximum error is implementation-defined.\n" |
| 35727 | " */\n" |
| 35728 | "float __ovld __cnfn native_sin(float x);\n" |
| 35729 | "float2 __ovld __cnfn native_sin(float2 x);\n" |
| 35730 | "float3 __ovld __cnfn native_sin(float3 x);\n" |
| 35731 | "float4 __ovld __cnfn native_sin(float4 x);\n" |
| 35732 | "float8 __ovld __cnfn native_sin(float8 x);\n" |
| 35733 | "float16 __ovld __cnfn native_sin(float16 x);\n" |
| 35734 | "\n" |
| 35735 | "/**\n" |
| 35736 | " * Compute square root over an implementation-defined\n" |
| 35737 | " * range. The maximum error is implementation-defined.\n" |
| 35738 | " */\n" |
| 35739 | "float __ovld __cnfn native_sqrt(float x);\n" |
| 35740 | "float2 __ovld __cnfn native_sqrt(float2 x);\n" |
| 35741 | "float3 __ovld __cnfn native_sqrt(float3 x);\n" |
| 35742 | "float4 __ovld __cnfn native_sqrt(float4 x);\n" |
| 35743 | "float8 __ovld __cnfn native_sqrt(float8 x);\n" |
| 35744 | "float16 __ovld __cnfn native_sqrt(float16 x);\n" |
| 35745 | "\n" |
| 35746 | "/**\n" |
| 35747 | " * Compute tangent over an implementation-defined range.\n" |
| 35748 | " * The maximum error is implementation-defined.\n" |
| 35749 | " */\n" |
| 35750 | "float __ovld __cnfn native_tan(float x);\n" |
| 35751 | "float2 __ovld __cnfn native_tan(float2 x);\n" |
| 35752 | "float3 __ovld __cnfn native_tan(float3 x);\n" |
| 35753 | "float4 __ovld __cnfn native_tan(float4 x);\n" |
| 35754 | "float8 __ovld __cnfn native_tan(float8 x);\n" |
| 35755 | "float16 __ovld __cnfn native_tan(float16 x);\n" |
| 35756 | "\n" |
| 35757 | "// OpenCL v1.1 s6.11.3, v1.2 s6.12.3, v2.0 s6.13.3 - Integer Functions\n" |
| 35758 | "\n" |
| 35759 | "/**\n" |
| 35760 | " * Returns | x |.\n" |
| 35761 | " */\n" |
| 35762 | "uchar __ovld __cnfn abs(char x);\n" |
| 35763 | "uchar __ovld __cnfn abs(uchar x);\n" |
| 35764 | "uchar2 __ovld __cnfn abs(char2 x);\n" |
| 35765 | "uchar2 __ovld __cnfn abs(uchar2 x);\n" |
| 35766 | "uchar3 __ovld __cnfn abs(char3 x);\n" |
| 35767 | "uchar3 __ovld __cnfn abs(uchar3 x);\n" |
| 35768 | "uchar4 __ovld __cnfn abs(char4 x);\n" |
| 35769 | "uchar4 __ovld __cnfn abs(uchar4 x);\n" |
| 35770 | "uchar8 __ovld __cnfn abs(char8 x);\n" |
| 35771 | "uchar8 __ovld __cnfn abs(uchar8 x);\n" |
| 35772 | "uchar16 __ovld __cnfn abs(char16 x);\n" |
| 35773 | "uchar16 __ovld __cnfn abs(uchar16 x);\n" |
| 35774 | "ushort __ovld __cnfn abs(short x);\n" |
| 35775 | "ushort __ovld __cnfn abs(ushort x);\n" |
| 35776 | "ushort2 __ovld __cnfn abs(short2 x);\n" |
| 35777 | "ushort2 __ovld __cnfn abs(ushort2 x);\n" |
| 35778 | "ushort3 __ovld __cnfn abs(short3 x);\n" |
| 35779 | "ushort3 __ovld __cnfn abs(ushort3 x);\n" |
| 35780 | "ushort4 __ovld __cnfn abs(short4 x);\n" |
| 35781 | "ushort4 __ovld __cnfn abs(ushort4 x);\n" |
| 35782 | "ushort8 __ovld __cnfn abs(short8 x);\n" |
| 35783 | "ushort8 __ovld __cnfn abs(ushort8 x);\n" |
| 35784 | "ushort16 __ovld __cnfn abs(short16 x);\n" |
| 35785 | "ushort16 __ovld __cnfn abs(ushort16 x);\n" |
| 35786 | "uint __ovld __cnfn abs(int x);\n" |
| 35787 | "uint __ovld __cnfn abs(uint x);\n" |
| 35788 | "uint2 __ovld __cnfn abs(int2 x);\n" |
| 35789 | "uint2 __ovld __cnfn abs(uint2 x);\n" |
| 35790 | "uint3 __ovld __cnfn abs(int3 x);\n" |
| 35791 | "uint3 __ovld __cnfn abs(uint3 x);\n" |
| 35792 | "uint4 __ovld __cnfn abs(int4 x);\n" |
| 35793 | "uint4 __ovld __cnfn abs(uint4 x);\n" |
| 35794 | "uint8 __ovld __cnfn abs(int8 x);\n" |
| 35795 | "uint8 __ovld __cnfn abs(uint8 x);\n" |
| 35796 | "uint16 __ovld __cnfn abs(int16 x);\n" |
| 35797 | "uint16 __ovld __cnfn abs(uint16 x);\n" |
| 35798 | "ulong __ovld __cnfn abs(long x);\n" |
| 35799 | "ulong __ovld __cnfn abs(ulong x);\n" |
| 35800 | "ulong2 __ovld __cnfn abs(long2 x);\n" |
| 35801 | "ulong2 __ovld __cnfn abs(ulong2 x);\n" |
| 35802 | "ulong3 __ovld __cnfn abs(long3 x);\n" |
| 35803 | "ulong3 __ovld __cnfn abs(ulong3 x);\n" |
| 35804 | "ulong4 __ovld __cnfn abs(long4 x);\n" |
| 35805 | "ulong4 __ovld __cnfn abs(ulong4 x);\n" |
| 35806 | "ulong8 __ovld __cnfn abs(long8 x);\n" |
| 35807 | "ulong8 __ovld __cnfn abs(ulong8 x);\n" |
| 35808 | "ulong16 __ovld __cnfn abs(long16 x);\n" |
| 35809 | "ulong16 __ovld __cnfn abs(ulong16 x);\n" |
| 35810 | "\n" |
| 35811 | "/**\n" |
| 35812 | " * Returns | x - y | without modulo overflow.\n" |
| 35813 | " */\n" |
| 35814 | "uchar __ovld __cnfn abs_diff(char x, char y);\n" |
| 35815 | "uchar __ovld __cnfn abs_diff(uchar x, uchar y);\n" |
| 35816 | "uchar2 __ovld __cnfn abs_diff(char2 x, char2 y);\n" |
| 35817 | "uchar2 __ovld __cnfn abs_diff(uchar2 x, uchar2 y);\n" |
| 35818 | "uchar3 __ovld __cnfn abs_diff(char3 x, char3 y);\n" |
| 35819 | "uchar3 __ovld __cnfn abs_diff(uchar3 x, uchar3 y);\n" |
| 35820 | "uchar4 __ovld __cnfn abs_diff(char4 x, char4 y);\n" |
| 35821 | "uchar4 __ovld __cnfn abs_diff(uchar4 x, uchar4 y);\n" |
| 35822 | "uchar8 __ovld __cnfn abs_diff(char8 x, char8 y);\n" |
| 35823 | "uchar8 __ovld __cnfn abs_diff(uchar8 x, uchar8 y);\n" |
| 35824 | "uchar16 __ovld __cnfn abs_diff(char16 x, char16 y);\n" |
| 35825 | "uchar16 __ovld __cnfn abs_diff(uchar16 x, uchar16 y);\n" |
| 35826 | "ushort __ovld __cnfn abs_diff(short x, short y);\n" |
| 35827 | "ushort __ovld __cnfn abs_diff(ushort x, ushort y);\n" |
| 35828 | "ushort2 __ovld __cnfn abs_diff(short2 x, short2 y);\n" |
| 35829 | "ushort2 __ovld __cnfn abs_diff(ushort2 x, ushort2 y);\n" |
| 35830 | "ushort3 __ovld __cnfn abs_diff(short3 x, short3 y);\n" |
| 35831 | "ushort3 __ovld __cnfn abs_diff(ushort3 x, ushort3 y);\n" |
| 35832 | "ushort4 __ovld __cnfn abs_diff(short4 x, short4 y);\n" |
| 35833 | "ushort4 __ovld __cnfn abs_diff(ushort4 x, ushort4 y);\n" |
| 35834 | "ushort8 __ovld __cnfn abs_diff(short8 x, short8 y);\n" |
| 35835 | "ushort8 __ovld __cnfn abs_diff(ushort8 x, ushort8 y);\n" |
| 35836 | "ushort16 __ovld __cnfn abs_diff(short16 x, short16 y);\n" |
| 35837 | "ushort16 __ovld __cnfn abs_diff(ushort16 x, ushort16 y);\n" |
| 35838 | "uint __ovld __cnfn abs_diff(int x, int y);\n" |
| 35839 | "uint __ovld __cnfn abs_diff(uint x, uint y);\n" |
| 35840 | "uint2 __ovld __cnfn abs_diff(int2 x, int2 y);\n" |
| 35841 | "uint2 __ovld __cnfn abs_diff(uint2 x, uint2 y);\n" |
| 35842 | "uint3 __ovld __cnfn abs_diff(int3 x, int3 y);\n" |
| 35843 | "uint3 __ovld __cnfn abs_diff(uint3 x, uint3 y);\n" |
| 35844 | "uint4 __ovld __cnfn abs_diff(int4 x, int4 y);\n" |
| 35845 | "uint4 __ovld __cnfn abs_diff(uint4 x, uint4 y);\n" |
| 35846 | "uint8 __ovld __cnfn abs_diff(int8 x, int8 y);\n" |
| 35847 | "uint8 __ovld __cnfn abs_diff(uint8 x, uint8 y);\n" |
| 35848 | "uint16 __ovld __cnfn abs_diff(int16 x, int16 y);\n" |
| 35849 | "uint16 __ovld __cnfn abs_diff(uint16 x, uint16 y);\n" |
| 35850 | "ulong __ovld __cnfn abs_diff(long x, long y);\n" |
| 35851 | "ulong __ovld __cnfn abs_diff(ulong x, ulong y);\n" |
| 35852 | "ulong2 __ovld __cnfn abs_diff(long2 x, long2 y);\n" |
| 35853 | "ulong2 __ovld __cnfn abs_diff(ulong2 x, ulong2 y);\n" |
| 35854 | "ulong3 __ovld __cnfn abs_diff(long3 x, long3 y);\n" |
| 35855 | "ulong3 __ovld __cnfn abs_diff(ulong3 x, ulong3 y);\n" |
| 35856 | "ulong4 __ovld __cnfn abs_diff(long4 x, long4 y);\n" |
| 35857 | "ulong4 __ovld __cnfn abs_diff(ulong4 x, ulong4 y);\n" |
| 35858 | "ulong8 __ovld __cnfn abs_diff(long8 x, long8 y);\n" |
| 35859 | "ulong8 __ovld __cnfn abs_diff(ulong8 x, ulong8 y);\n" |
| 35860 | "ulong16 __ovld __cnfn abs_diff(long16 x, long16 y);\n" |
| 35861 | "ulong16 __ovld __cnfn abs_diff(ulong16 x, ulong16 y);\n" |
| 35862 | "\n" |
| 35863 | "/**\n" |
| 35864 | " * Returns x + y and saturates the result.\n" |
| 35865 | " */\n" |
| 35866 | "char __ovld __cnfn add_sat(char x, char y);\n" |
| 35867 | "uchar __ovld __cnfn add_sat(uchar x, uchar y);\n" |
| 35868 | "char2 __ovld __cnfn add_sat(char2 x, char2 y);\n" |
| 35869 | "uchar2 __ovld __cnfn add_sat(uchar2 x, uchar2 y);\n" |
| 35870 | "char3 __ovld __cnfn add_sat(char3 x, char3 y);\n" |
| 35871 | "uchar3 __ovld __cnfn add_sat(uchar3 x, uchar3 y);\n" |
| 35872 | "char4 __ovld __cnfn add_sat(char4 x, char4 y);\n" |
| 35873 | "uchar4 __ovld __cnfn add_sat(uchar4 x, uchar4 y);\n" |
| 35874 | "char8 __ovld __cnfn add_sat(char8 x, char8 y);\n" |
| 35875 | "uchar8 __ovld __cnfn add_sat(uchar8 x, uchar8 y);\n" |
| 35876 | "char16 __ovld __cnfn add_sat(char16 x, char16 y);\n" |
| 35877 | "uchar16 __ovld __cnfn add_sat(uchar16 x, uchar16 y);\n" |
| 35878 | "short __ovld __cnfn add_sat(short x, short y);\n" |
| 35879 | "ushort __ovld __cnfn add_sat(ushort x, ushort y);\n" |
| 35880 | "short2 __ovld __cnfn add_sat(short2 x, short2 y);\n" |
| 35881 | "ushort2 __ovld __cnfn add_sat(ushort2 x, ushort2 y);\n" |
| 35882 | "short3 __ovld __cnfn add_sat(short3 x, short3 y);\n" |
| 35883 | "ushort3 __ovld __cnfn add_sat(ushort3 x, ushort3 y);\n" |
| 35884 | "short4 __ovld __cnfn add_sat(short4 x, short4 y);\n" |
| 35885 | "ushort4 __ovld __cnfn add_sat(ushort4 x, ushort4 y);\n" |
| 35886 | "short8 __ovld __cnfn add_sat(short8 x, short8 y);\n" |
| 35887 | "ushort8 __ovld __cnfn add_sat(ushort8 x, ushort8 y);\n" |
| 35888 | "short16 __ovld __cnfn add_sat(short16 x, short16 y);\n" |
| 35889 | "ushort16 __ovld __cnfn add_sat(ushort16 x, ushort16 y);\n" |
| 35890 | "int __ovld __cnfn add_sat(int x, int y);\n" |
| 35891 | "uint __ovld __cnfn add_sat(uint x, uint y);\n" |
| 35892 | "int2 __ovld __cnfn add_sat(int2 x, int2 y);\n" |
| 35893 | "uint2 __ovld __cnfn add_sat(uint2 x, uint2 y);\n" |
| 35894 | "int3 __ovld __cnfn add_sat(int3 x, int3 y);\n" |
| 35895 | "uint3 __ovld __cnfn add_sat(uint3 x, uint3 y);\n" |
| 35896 | "int4 __ovld __cnfn add_sat(int4 x, int4 y);\n" |
| 35897 | "uint4 __ovld __cnfn add_sat(uint4 x, uint4 y);\n" |
| 35898 | "int8 __ovld __cnfn add_sat(int8 x, int8 y);\n" |
| 35899 | "uint8 __ovld __cnfn add_sat(uint8 x, uint8 y);\n" |
| 35900 | "int16 __ovld __cnfn add_sat(int16 x, int16 y);\n" |
| 35901 | "uint16 __ovld __cnfn add_sat(uint16 x, uint16 y);\n" |
| 35902 | "long __ovld __cnfn add_sat(long x, long y);\n" |
| 35903 | "ulong __ovld __cnfn add_sat(ulong x, ulong y);\n" |
| 35904 | "long2 __ovld __cnfn add_sat(long2 x, long2 y);\n" |
| 35905 | "ulong2 __ovld __cnfn add_sat(ulong2 x, ulong2 y);\n" |
| 35906 | "long3 __ovld __cnfn add_sat(long3 x, long3 y);\n" |
| 35907 | "ulong3 __ovld __cnfn add_sat(ulong3 x, ulong3 y);\n" |
| 35908 | "long4 __ovld __cnfn add_sat(long4 x, long4 y);\n" |
| 35909 | "ulong4 __ovld __cnfn add_sat(ulong4 x, ulong4 y);\n" |
| 35910 | "long8 __ovld __cnfn add_sat(long8 x, long8 y);\n" |
| 35911 | "ulong8 __ovld __cnfn add_sat(ulong8 x, ulong8 y);\n" |
| 35912 | "long16 __ovld __cnfn add_sat(long16 x, long16 y);\n" |
| 35913 | "ulong16 __ovld __cnfn add_sat(ulong16 x, ulong16 y);\n" |
| 35914 | "\n" |
| 35915 | "/**\n" |
| 35916 | " * Returns (x + y) >> 1. The intermediate sum does\n" |
| 35917 | " * not modulo overflow.\n" |
| 35918 | " */\n" |
| 35919 | "char __ovld __cnfn hadd(char x, char y);\n" |
| 35920 | "uchar __ovld __cnfn hadd(uchar x, uchar y);\n" |
| 35921 | "char2 __ovld __cnfn hadd(char2 x, char2 y);\n" |
| 35922 | "uchar2 __ovld __cnfn hadd(uchar2 x, uchar2 y);\n" |
| 35923 | "char3 __ovld __cnfn hadd(char3 x, char3 y);\n" |
| 35924 | "uchar3 __ovld __cnfn hadd(uchar3 x, uchar3 y);\n" |
| 35925 | "char4 __ovld __cnfn hadd(char4 x, char4 y);\n" |
| 35926 | "uchar4 __ovld __cnfn hadd(uchar4 x, uchar4 y);\n" |
| 35927 | "char8 __ovld __cnfn hadd(char8 x, char8 y);\n" |
| 35928 | "uchar8 __ovld __cnfn hadd(uchar8 x, uchar8 y);\n" |
| 35929 | "char16 __ovld __cnfn hadd(char16 x, char16 y);\n" |
| 35930 | "uchar16 __ovld __cnfn hadd(uchar16 x, uchar16 y);\n" |
| 35931 | "short __ovld __cnfn hadd(short x, short y);\n" |
| 35932 | "ushort __ovld __cnfn hadd(ushort x, ushort y);\n" |
| 35933 | "short2 __ovld __cnfn hadd(short2 x, short2 y);\n" |
| 35934 | "ushort2 __ovld __cnfn hadd(ushort2 x, ushort2 y);\n" |
| 35935 | "short3 __ovld __cnfn hadd(short3 x, short3 y);\n" |
| 35936 | "ushort3 __ovld __cnfn hadd(ushort3 x, ushort3 y);\n" |
| 35937 | "short4 __ovld __cnfn hadd(short4 x, short4 y);\n" |
| 35938 | "ushort4 __ovld __cnfn hadd(ushort4 x, ushort4 y);\n" |
| 35939 | "short8 __ovld __cnfn hadd(short8 x, short8 y);\n" |
| 35940 | "ushort8 __ovld __cnfn hadd(ushort8 x, ushort8 y);\n" |
| 35941 | "short16 __ovld __cnfn hadd(short16 x, short16 y);\n" |
| 35942 | "ushort16 __ovld __cnfn hadd(ushort16 x, ushort16 y);\n" |
| 35943 | "int __ovld __cnfn hadd(int x, int y);\n" |
| 35944 | "uint __ovld __cnfn hadd(uint x, uint y);\n" |
| 35945 | "int2 __ovld __cnfn hadd(int2 x, int2 y);\n" |
| 35946 | "uint2 __ovld __cnfn hadd(uint2 x, uint2 y);\n" |
| 35947 | "int3 __ovld __cnfn hadd(int3 x, int3 y);\n" |
| 35948 | "uint3 __ovld __cnfn hadd(uint3 x, uint3 y);\n" |
| 35949 | "int4 __ovld __cnfn hadd(int4 x, int4 y);\n" |
| 35950 | "uint4 __ovld __cnfn hadd(uint4 x, uint4 y);\n" |
| 35951 | "int8 __ovld __cnfn hadd(int8 x, int8 y);\n" |
| 35952 | "uint8 __ovld __cnfn hadd(uint8 x, uint8 y);\n" |
| 35953 | "int16 __ovld __cnfn hadd(int16 x, int16 y);\n" |
| 35954 | "uint16 __ovld __cnfn hadd(uint16 x, uint16 y);\n" |
| 35955 | "long __ovld __cnfn hadd(long x, long y);\n" |
| 35956 | "ulong __ovld __cnfn hadd(ulong x, ulong y);\n" |
| 35957 | "long2 __ovld __cnfn hadd(long2 x, long2 y);\n" |
| 35958 | "ulong2 __ovld __cnfn hadd(ulong2 x, ulong2 y);\n" |
| 35959 | "long3 __ovld __cnfn hadd(long3 x, long3 y);\n" |
| 35960 | "ulong3 __ovld __cnfn hadd(ulong3 x, ulong3 y);\n" |
| 35961 | "long4 __ovld __cnfn hadd(long4 x, long4 y);\n" |
| 35962 | "ulong4 __ovld __cnfn hadd(ulong4 x, ulong4 y);\n" |
| 35963 | "long8 __ovld __cnfn hadd(long8 x, long8 y);\n" |
| 35964 | "ulong8 __ovld __cnfn hadd(ulong8 x, ulong8 y);\n" |
| 35965 | "long16 __ovld __cnfn hadd(long16 x, long16 y);\n" |
| 35966 | "ulong16 __ovld __cnfn hadd(ulong16 x, ulong16 y);\n" |
| 35967 | "\n" |
| 35968 | "/**\n" |
| 35969 | " * Returns (x + y + 1) >> 1. The intermediate sum\n" |
| 35970 | " * does not modulo overflow.\n" |
| 35971 | " */\n" |
| 35972 | "char __ovld __cnfn rhadd(char x, char y);\n" |
| 35973 | "uchar __ovld __cnfn rhadd(uchar x, uchar y);\n" |
| 35974 | "char2 __ovld __cnfn rhadd(char2 x, char2 y);\n" |
| 35975 | "uchar2 __ovld __cnfn rhadd(uchar2 x, uchar2 y);\n" |
| 35976 | "char3 __ovld __cnfn rhadd(char3 x, char3 y);\n" |
| 35977 | "uchar3 __ovld __cnfn rhadd(uchar3 x, uchar3 y);\n" |
| 35978 | "char4 __ovld __cnfn rhadd(char4 x, char4 y);\n" |
| 35979 | "uchar4 __ovld __cnfn rhadd(uchar4 x, uchar4 y);\n" |
| 35980 | "char8 __ovld __cnfn rhadd(char8 x, char8 y);\n" |
| 35981 | "uchar8 __ovld __cnfn rhadd(uchar8 x, uchar8 y);\n" |
| 35982 | "char16 __ovld __cnfn rhadd(char16 x, char16 y);\n" |
| 35983 | "uchar16 __ovld __cnfn rhadd(uchar16 x, uchar16 y);\n" |
| 35984 | "short __ovld __cnfn rhadd(short x, short y);\n" |
| 35985 | "ushort __ovld __cnfn rhadd(ushort x, ushort y);\n" |
| 35986 | "short2 __ovld __cnfn rhadd(short2 x, short2 y);\n" |
| 35987 | "ushort2 __ovld __cnfn rhadd(ushort2 x, ushort2 y);\n" |
| 35988 | "short3 __ovld __cnfn rhadd(short3 x, short3 y);\n" |
| 35989 | "ushort3 __ovld __cnfn rhadd(ushort3 x, ushort3 y);\n" |
| 35990 | "short4 __ovld __cnfn rhadd(short4 x, short4 y);\n" |
| 35991 | "ushort4 __ovld __cnfn rhadd(ushort4 x, ushort4 y);\n" |
| 35992 | "short8 __ovld __cnfn rhadd(short8 x, short8 y);\n" |
| 35993 | "ushort8 __ovld __cnfn rhadd(ushort8 x, ushort8 y);\n" |
| 35994 | "short16 __ovld __cnfn rhadd(short16 x, short16 y);\n" |
| 35995 | "ushort16 __ovld __cnfn rhadd(ushort16 x, ushort16 y);\n" |
| 35996 | "int __ovld __cnfn rhadd(int x, int y);\n" |
| 35997 | "uint __ovld __cnfn rhadd(uint x, uint y);\n" |
| 35998 | "int2 __ovld __cnfn rhadd(int2 x, int2 y);\n" |
| 35999 | "uint2 __ovld __cnfn rhadd(uint2 x, uint2 y);\n" |
| 36000 | "int3 __ovld __cnfn rhadd(int3 x, int3 y);\n" |
| 36001 | "uint3 __ovld __cnfn rhadd(uint3 x, uint3 y);\n" |
| 36002 | "int4 __ovld __cnfn rhadd(int4 x, int4 y);\n" |
| 36003 | "uint4 __ovld __cnfn rhadd(uint4 x, uint4 y);\n" |
| 36004 | "int8 __ovld __cnfn rhadd(int8 x, int8 y);\n" |
| 36005 | "uint8 __ovld __cnfn rhadd(uint8 x, uint8 y);\n" |
| 36006 | "int16 __ovld __cnfn rhadd(int16 x, int16 y);\n" |
| 36007 | "uint16 __ovld __cnfn rhadd(uint16 x, uint16 y);\n" |
| 36008 | "long __ovld __cnfn rhadd(long x, long y);\n" |
| 36009 | "ulong __ovld __cnfn rhadd(ulong x, ulong y);\n" |
| 36010 | "long2 __ovld __cnfn rhadd(long2 x, long2 y);\n" |
| 36011 | "ulong2 __ovld __cnfn rhadd(ulong2 x, ulong2 y);\n" |
| 36012 | "long3 __ovld __cnfn rhadd(long3 x, long3 y);\n" |
| 36013 | "ulong3 __ovld __cnfn rhadd(ulong3 x, ulong3 y);\n" |
| 36014 | "long4 __ovld __cnfn rhadd(long4 x, long4 y);\n" |
| 36015 | "ulong4 __ovld __cnfn rhadd(ulong4 x, ulong4 y);\n" |
| 36016 | "long8 __ovld __cnfn rhadd(long8 x, long8 y);\n" |
| 36017 | "ulong8 __ovld __cnfn rhadd(ulong8 x, ulong8 y);\n" |
| 36018 | "long16 __ovld __cnfn rhadd(long16 x, long16 y);\n" |
| 36019 | "ulong16 __ovld __cnfn rhadd(ulong16 x, ulong16 y);\n" |
| 36020 | "\n" |
| 36021 | "/**\n" |
| 36022 | " * Returns min(max(x, minval), maxval).\n" |
| 36023 | " * Results are undefined if minval > maxval.\n" |
| 36024 | " */\n" |
| 36025 | "char __ovld __cnfn clamp(char x, char minval, char maxval);\n" |
| 36026 | "uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n" |
| 36027 | "char2 __ovld __cnfn clamp(char2 x, char2 minval, char2 maxval);\n" |
| 36028 | "uchar2 __ovld __cnfn clamp(uchar2 x, uchar2 minval, uchar2 maxval);\n" |
| 36029 | "char3 __ovld __cnfn clamp(char3 x, char3 minval, char3 maxval);\n" |
| 36030 | "uchar3 __ovld __cnfn clamp(uchar3 x, uchar3 minval, uchar3 maxval);\n" |
| 36031 | "char4 __ovld __cnfn clamp(char4 x, char4 minval, char4 maxval);\n" |
| 36032 | "uchar4 __ovld __cnfn clamp(uchar4 x, uchar4 minval, uchar4 maxval);\n" |
| 36033 | "char8 __ovld __cnfn clamp(char8 x, char8 minval, char8 maxval);\n" |
| 36034 | "uchar8 __ovld __cnfn clamp(uchar8 x, uchar8 minval, uchar8 maxval);\n" |
| 36035 | "char16 __ovld __cnfn clamp(char16 x, char16 minval, char16 maxval);\n" |
| 36036 | "uchar16 __ovld __cnfn clamp(uchar16 x, uchar16 minval, uchar16 maxval);\n" |
| 36037 | "short __ovld __cnfn clamp(short x, short minval, short maxval);\n" |
| 36038 | "ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n" |
| 36039 | "short2 __ovld __cnfn clamp(short2 x, short2 minval, short2 maxval);\n" |
| 36040 | "ushort2 __ovld __cnfn clamp(ushort2 x, ushort2 minval, ushort2 maxval);\n" |
| 36041 | "short3 __ovld __cnfn clamp(short3 x, short3 minval, short3 maxval);\n" |
| 36042 | "ushort3 __ovld __cnfn clamp(ushort3 x, ushort3 minval, ushort3 maxval);\n" |
| 36043 | "short4 __ovld __cnfn clamp(short4 x, short4 minval, short4 maxval);\n" |
| 36044 | "ushort4 __ovld __cnfn clamp(ushort4 x, ushort4 minval, ushort4 maxval);\n" |
| 36045 | "short8 __ovld __cnfn clamp(short8 x, short8 minval, short8 maxval);\n" |
| 36046 | "ushort8 __ovld __cnfn clamp(ushort8 x, ushort8 minval, ushort8 maxval);\n" |
| 36047 | "short16 __ovld __cnfn clamp(short16 x, short16 minval, short16 maxval);\n" |
| 36048 | "ushort16 __ovld __cnfn clamp(ushort16 x, ushort16 minval, ushort16 maxval);\n" |
| 36049 | "int __ovld __cnfn clamp(int x, int minval, int maxval);\n" |
| 36050 | "uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n" |
| 36051 | "int2 __ovld __cnfn clamp(int2 x, int2 minval, int2 maxval);\n" |
| 36052 | "uint2 __ovld __cnfn clamp(uint2 x, uint2 minval, uint2 maxval);\n" |
| 36053 | "int3 __ovld __cnfn clamp(int3 x, int3 minval, int3 maxval);\n" |
| 36054 | "uint3 __ovld __cnfn clamp(uint3 x, uint3 minval, uint3 maxval);\n" |
| 36055 | "int4 __ovld __cnfn clamp(int4 x, int4 minval, int4 maxval);\n" |
| 36056 | "uint4 __ovld __cnfn clamp(uint4 x, uint4 minval, uint4 maxval);\n" |
| 36057 | "int8 __ovld __cnfn clamp(int8 x, int8 minval, int8 maxval);\n" |
| 36058 | "uint8 __ovld __cnfn clamp(uint8 x, uint8 minval, uint8 maxval);\n" |
| 36059 | "int16 __ovld __cnfn clamp(int16 x, int16 minval, int16 maxval);\n" |
| 36060 | "uint16 __ovld __cnfn clamp(uint16 x, uint16 minval, uint16 maxval);\n" |
| 36061 | "long __ovld __cnfn clamp(long x, long minval, long maxval);\n" |
| 36062 | "ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n" |
| 36063 | "long2 __ovld __cnfn clamp(long2 x, long2 minval, long2 maxval);\n" |
| 36064 | "ulong2 __ovld __cnfn clamp(ulong2 x, ulong2 minval, ulong2 maxval);\n" |
| 36065 | "long3 __ovld __cnfn clamp(long3 x, long3 minval, long3 maxval);\n" |
| 36066 | "ulong3 __ovld __cnfn clamp(ulong3 x, ulong3 minval, ulong3 maxval);\n" |
| 36067 | "long4 __ovld __cnfn clamp(long4 x, long4 minval, long4 maxval);\n" |
| 36068 | "ulong4 __ovld __cnfn clamp(ulong4 x, ulong4 minval, ulong4 maxval);\n" |
| 36069 | "long8 __ovld __cnfn clamp(long8 x, long8 minval, long8 maxval);\n" |
| 36070 | "ulong8 __ovld __cnfn clamp(ulong8 x, ulong8 minval, ulong8 maxval);\n" |
| 36071 | "long16 __ovld __cnfn clamp(long16 x, long16 minval, long16 maxval);\n" |
| 36072 | "ulong16 __ovld __cnfn clamp(ulong16 x, ulong16 minval, ulong16 maxval);\n" |
| 36073 | "char __ovld __cnfn clamp(char x, char minval, char maxval);\n" |
| 36074 | "uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval);\n" |
| 36075 | "char2 __ovld __cnfn clamp(char2 x, char minval, char maxval);\n" |
| 36076 | "uchar2 __ovld __cnfn clamp(uchar2 x, uchar minval, uchar maxval);\n" |
| 36077 | "char3 __ovld __cnfn clamp(char3 x, char minval, char maxval);\n" |
| 36078 | "uchar3 __ovld __cnfn clamp(uchar3 x, uchar minval, uchar maxval);\n" |
| 36079 | "char4 __ovld __cnfn clamp(char4 x, char minval, char maxval);\n" |
| 36080 | "uchar4 __ovld __cnfn clamp(uchar4 x, uchar minval, uchar maxval);\n" |
| 36081 | "char8 __ovld __cnfn clamp(char8 x, char minval, char maxval);\n" |
| 36082 | "uchar8 __ovld __cnfn clamp(uchar8 x, uchar minval, uchar maxval);\n" |
| 36083 | "char16 __ovld __cnfn clamp(char16 x, char minval, char maxval);\n" |
| 36084 | "uchar16 __ovld __cnfn clamp(uchar16 x, uchar minval, uchar maxval);\n" |
| 36085 | "short __ovld __cnfn clamp(short x, short minval, short maxval);\n" |
| 36086 | "ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval);\n" |
| 36087 | "short2 __ovld __cnfn clamp(short2 x, short minval, short maxval);\n" |
| 36088 | "ushort2 __ovld __cnfn clamp(ushort2 x, ushort minval, ushort maxval);\n" |
| 36089 | "short3 __ovld __cnfn clamp(short3 x, short minval, short maxval);\n" |
| 36090 | "ushort3 __ovld __cnfn clamp(ushort3 x, ushort minval, ushort maxval);\n" |
| 36091 | "short4 __ovld __cnfn clamp(short4 x, short minval, short maxval);\n" |
| 36092 | "ushort4 __ovld __cnfn clamp(ushort4 x, ushort minval, ushort maxval);\n" |
| 36093 | "short8 __ovld __cnfn clamp(short8 x, short minval, short maxval);\n" |
| 36094 | "ushort8 __ovld __cnfn clamp(ushort8 x, ushort minval, ushort maxval);\n" |
| 36095 | "short16 __ovld __cnfn clamp(short16 x, short minval, short maxval);\n" |
| 36096 | "ushort16 __ovld __cnfn clamp(ushort16 x, ushort minval, ushort maxval);\n" |
| 36097 | "int __ovld __cnfn clamp(int x, int minval, int maxval);\n" |
| 36098 | "uint __ovld __cnfn clamp(uint x, uint minval, uint maxval);\n" |
| 36099 | "int2 __ovld __cnfn clamp(int2 x, int minval, int maxval);\n" |
| 36100 | "uint2 __ovld __cnfn clamp(uint2 x, uint minval, uint maxval);\n" |
| 36101 | "int3 __ovld __cnfn clamp(int3 x, int minval, int maxval);\n" |
| 36102 | "uint3 __ovld __cnfn clamp(uint3 x, uint minval, uint maxval);\n" |
| 36103 | "int4 __ovld __cnfn clamp(int4 x, int minval, int maxval);\n" |
| 36104 | "uint4 __ovld __cnfn clamp(uint4 x, uint minval, uint maxval);\n" |
| 36105 | "int8 __ovld __cnfn clamp(int8 x, int minval, int maxval);\n" |
| 36106 | "uint8 __ovld __cnfn clamp(uint8 x, uint minval, uint maxval);\n" |
| 36107 | "int16 __ovld __cnfn clamp(int16 x, int minval, int maxval);\n" |
| 36108 | "uint16 __ovld __cnfn clamp(uint16 x, uint minval, uint maxval);\n" |
| 36109 | "long __ovld __cnfn clamp(long x, long minval, long maxval);\n" |
| 36110 | "ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval);\n" |
| 36111 | "long2 __ovld __cnfn clamp(long2 x, long minval, long maxval);\n" |
| 36112 | "ulong2 __ovld __cnfn clamp(ulong2 x, ulong minval, ulong maxval);\n" |
| 36113 | "long3 __ovld __cnfn clamp(long3 x, long minval, long maxval);\n" |
| 36114 | "ulong3 __ovld __cnfn clamp(ulong3 x, ulong minval, ulong maxval);\n" |
| 36115 | "long4 __ovld __cnfn clamp(long4 x, long minval, long maxval);\n" |
| 36116 | "ulong4 __ovld __cnfn clamp(ulong4 x, ulong minval, ulong maxval);\n" |
| 36117 | "long8 __ovld __cnfn clamp(long8 x, long minval, long maxval);\n" |
| 36118 | "ulong8 __ovld __cnfn clamp(ulong8 x, ulong minval, ulong maxval);\n" |
| 36119 | "long16 __ovld __cnfn clamp(long16 x, long minval, long maxval);\n" |
| 36120 | "ulong16 __ovld __cnfn clamp(ulong16 x, ulong minval, ulong maxval);\n" |
| 36121 | "\n" |
| 36122 | "/**\n" |
| 36123 | " * Returns the number of leading 0-bits in x, starting\n" |
| 36124 | " * at the most significant bit position.\n" |
| 36125 | " */\n" |
| 36126 | "char __ovld __cnfn clz(char x);\n" |
| 36127 | "uchar __ovld __cnfn clz(uchar x);\n" |
| 36128 | "char2 __ovld __cnfn clz(char2 x);\n" |
| 36129 | "uchar2 __ovld __cnfn clz(uchar2 x);\n" |
| 36130 | "char3 __ovld __cnfn clz(char3 x);\n" |
| 36131 | "uchar3 __ovld __cnfn clz(uchar3 x);\n" |
| 36132 | "char4 __ovld __cnfn clz(char4 x);\n" |
| 36133 | "uchar4 __ovld __cnfn clz(uchar4 x);\n" |
| 36134 | "char8 __ovld __cnfn clz(char8 x);\n" |
| 36135 | "uchar8 __ovld __cnfn clz(uchar8 x);\n" |
| 36136 | "char16 __ovld __cnfn clz(char16 x);\n" |
| 36137 | "uchar16 __ovld __cnfn clz(uchar16 x);\n" |
| 36138 | "short __ovld __cnfn clz(short x);\n" |
| 36139 | "ushort __ovld __cnfn clz(ushort x);\n" |
| 36140 | "short2 __ovld __cnfn clz(short2 x);\n" |
| 36141 | "ushort2 __ovld __cnfn clz(ushort2 x);\n" |
| 36142 | "short3 __ovld __cnfn clz(short3 x);\n" |
| 36143 | "ushort3 __ovld __cnfn clz(ushort3 x);\n" |
| 36144 | "short4 __ovld __cnfn clz(short4 x);\n" |
| 36145 | "ushort4 __ovld __cnfn clz(ushort4 x);\n" |
| 36146 | "short8 __ovld __cnfn clz(short8 x);\n" |
| 36147 | "ushort8 __ovld __cnfn clz(ushort8 x);\n" |
| 36148 | "short16 __ovld __cnfn clz(short16 x);\n" |
| 36149 | "ushort16 __ovld __cnfn clz(ushort16 x);\n" |
| 36150 | "int __ovld __cnfn clz(int x);\n" |
| 36151 | "uint __ovld __cnfn clz(uint x);\n" |
| 36152 | "int2 __ovld __cnfn clz(int2 x);\n" |
| 36153 | "uint2 __ovld __cnfn clz(uint2 x);\n" |
| 36154 | "int3 __ovld __cnfn clz(int3 x);\n" |
| 36155 | "uint3 __ovld __cnfn clz(uint3 x);\n" |
| 36156 | "int4 __ovld __cnfn clz(int4 x);\n" |
| 36157 | "uint4 __ovld __cnfn clz(uint4 x);\n" |
| 36158 | "int8 __ovld __cnfn clz(int8 x);\n" |
| 36159 | "uint8 __ovld __cnfn clz(uint8 x);\n" |
| 36160 | "int16 __ovld __cnfn clz(int16 x);\n" |
| 36161 | "uint16 __ovld __cnfn clz(uint16 x);\n" |
| 36162 | "long __ovld __cnfn clz(long x);\n" |
| 36163 | "ulong __ovld __cnfn clz(ulong x);\n" |
| 36164 | "long2 __ovld __cnfn clz(long2 x);\n" |
| 36165 | "ulong2 __ovld __cnfn clz(ulong2 x);\n" |
| 36166 | "long3 __ovld __cnfn clz(long3 x);\n" |
| 36167 | "ulong3 __ovld __cnfn clz(ulong3 x);\n" |
| 36168 | "long4 __ovld __cnfn clz(long4 x);\n" |
| 36169 | "ulong4 __ovld __cnfn clz(ulong4 x);\n" |
| 36170 | "long8 __ovld __cnfn clz(long8 x);\n" |
| 36171 | "ulong8 __ovld __cnfn clz(ulong8 x);\n" |
| 36172 | "long16 __ovld __cnfn clz(long16 x);\n" |
| 36173 | "ulong16 __ovld __cnfn clz(ulong16 x);\n" |
| 36174 | "\n" |
| 36175 | "/**\n" |
| 36176 | " * Returns the count of trailing 0-bits in x. If x is 0,\n" |
| 36177 | " * returns the size in bits of the type of x or\n" |
| 36178 | " * component type of x, if x is a vector.\n" |
| 36179 | " */\n" |
| 36180 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 36181 | "char __ovld ctz(char x);\n" |
| 36182 | "uchar __ovld ctz(uchar x);\n" |
| 36183 | "char2 __ovld ctz(char2 x);\n" |
| 36184 | "uchar2 __ovld ctz(uchar2 x);\n" |
| 36185 | "char3 __ovld ctz(char3 x);\n" |
| 36186 | "uchar3 __ovld ctz(uchar3 x);\n" |
| 36187 | "char4 __ovld ctz(char4 x);\n" |
| 36188 | "uchar4 __ovld ctz(uchar4 x);\n" |
| 36189 | "char8 __ovld ctz(char8 x);\n" |
| 36190 | "uchar8 __ovld ctz(uchar8 x);\n" |
| 36191 | "char16 __ovld ctz(char16 x);\n" |
| 36192 | "uchar16 __ovld ctz(uchar16 x);\n" |
| 36193 | "short __ovld ctz(short x);\n" |
| 36194 | "ushort __ovld ctz(ushort x);\n" |
| 36195 | "short2 __ovld ctz(short2 x);\n" |
| 36196 | "ushort2 __ovld ctz(ushort2 x);\n" |
| 36197 | "short3 __ovld ctz(short3 x);\n" |
| 36198 | "ushort3 __ovld ctz(ushort3 x);\n" |
| 36199 | "short4 __ovld ctz(short4 x);\n" |
| 36200 | "ushort4 __ovld ctz(ushort4 x);\n" |
| 36201 | "short8 __ovld ctz(short8 x);\n" |
| 36202 | "ushort8 __ovld ctz(ushort8 x);\n" |
| 36203 | "short16 __ovld ctz(short16 x);\n" |
| 36204 | "ushort16 __ovld ctz(ushort16 x);\n" |
| 36205 | "int __ovld ctz(int x);\n" |
| 36206 | "uint __ovld ctz(uint x);\n" |
| 36207 | "int2 __ovld ctz(int2 x);\n" |
| 36208 | "uint2 __ovld ctz(uint2 x);\n" |
| 36209 | "int3 __ovld ctz(int3 x);\n" |
| 36210 | "uint3 __ovld ctz(uint3 x);\n" |
| 36211 | "int4 __ovld ctz(int4 x);\n" |
| 36212 | "uint4 __ovld ctz(uint4 x);\n" |
| 36213 | "int8 __ovld ctz(int8 x);\n" |
| 36214 | "uint8 __ovld ctz(uint8 x);\n" |
| 36215 | "int16 __ovld ctz(int16 x);\n" |
| 36216 | "uint16 __ovld ctz(uint16 x);\n" |
| 36217 | "long __ovld ctz(long x);\n" |
| 36218 | "ulong __ovld ctz(ulong x);\n" |
| 36219 | "long2 __ovld ctz(long2 x);\n" |
| 36220 | "ulong2 __ovld ctz(ulong2 x);\n" |
| 36221 | "long3 __ovld ctz(long3 x);\n" |
| 36222 | "ulong3 __ovld ctz(ulong3 x);\n" |
| 36223 | "long4 __ovld ctz(long4 x);\n" |
| 36224 | "ulong4 __ovld ctz(ulong4 x);\n" |
| 36225 | "long8 __ovld ctz(long8 x);\n" |
| 36226 | "ulong8 __ovld ctz(ulong8 x);\n" |
| 36227 | "long16 __ovld ctz(long16 x);\n" |
| 36228 | "ulong16 __ovld ctz(ulong16 x);\n" |
| 36229 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 36230 | "\n" |
| 36231 | "/**\n" |
| 36232 | " * Returns mul_hi(a, b) + c.\n" |
| 36233 | " */\n" |
| 36234 | "char __ovld __cnfn mad_hi(char a, char b, char c);\n" |
| 36235 | "uchar __ovld __cnfn mad_hi(uchar a, uchar b, uchar c);\n" |
| 36236 | "char2 __ovld __cnfn mad_hi(char2 a, char2 b, char2 c);\n" |
| 36237 | "uchar2 __ovld __cnfn mad_hi(uchar2 a, uchar2 b, uchar2 c);\n" |
| 36238 | "char3 __ovld __cnfn mad_hi(char3 a, char3 b, char3 c);\n" |
| 36239 | "uchar3 __ovld __cnfn mad_hi(uchar3 a, uchar3 b, uchar3 c);\n" |
| 36240 | "char4 __ovld __cnfn mad_hi(char4 a, char4 b, char4 c);\n" |
| 36241 | "uchar4 __ovld __cnfn mad_hi(uchar4 a, uchar4 b, uchar4 c);\n" |
| 36242 | "char8 __ovld __cnfn mad_hi(char8 a, char8 b, char8 c);\n" |
| 36243 | "uchar8 __ovld __cnfn mad_hi(uchar8 a, uchar8 b, uchar8 c);\n" |
| 36244 | "char16 __ovld __cnfn mad_hi(char16 a, char16 b, char16 c);\n" |
| 36245 | "uchar16 __ovld __cnfn mad_hi(uchar16 a, uchar16 b, uchar16 c);\n" |
| 36246 | "short __ovld __cnfn mad_hi(short a, short b, short c);\n" |
| 36247 | "ushort __ovld __cnfn mad_hi(ushort a, ushort b, ushort c);\n" |
| 36248 | "short2 __ovld __cnfn mad_hi(short2 a, short2 b, short2 c);\n" |
| 36249 | "ushort2 __ovld __cnfn mad_hi(ushort2 a, ushort2 b, ushort2 c);\n" |
| 36250 | "short3 __ovld __cnfn mad_hi(short3 a, short3 b, short3 c);\n" |
| 36251 | "ushort3 __ovld __cnfn mad_hi(ushort3 a, ushort3 b, ushort3 c);\n" |
| 36252 | "short4 __ovld __cnfn mad_hi(short4 a, short4 b, short4 c);\n" |
| 36253 | "ushort4 __ovld __cnfn mad_hi(ushort4 a, ushort4 b, ushort4 c);\n" |
| 36254 | "short8 __ovld __cnfn mad_hi(short8 a, short8 b, short8 c);\n" |
| 36255 | "ushort8 __ovld __cnfn mad_hi(ushort8 a, ushort8 b, ushort8 c);\n" |
| 36256 | "short16 __ovld __cnfn mad_hi(short16 a, short16 b, short16 c);\n" |
| 36257 | "ushort16 __ovld __cnfn mad_hi(ushort16 a, ushort16 b, ushort16 c);\n" |
| 36258 | "int __ovld __cnfn mad_hi(int a, int b, int c);\n" |
| 36259 | "uint __ovld __cnfn mad_hi(uint a, uint b, uint c);\n" |
| 36260 | "int2 __ovld __cnfn mad_hi(int2 a, int2 b, int2 c);\n" |
| 36261 | "uint2 __ovld __cnfn mad_hi(uint2 a, uint2 b, uint2 c);\n" |
| 36262 | "int3 __ovld __cnfn mad_hi(int3 a, int3 b, int3 c);\n" |
| 36263 | "uint3 __ovld __cnfn mad_hi(uint3 a, uint3 b, uint3 c);\n" |
| 36264 | "int4 __ovld __cnfn mad_hi(int4 a, int4 b, int4 c);\n" |
| 36265 | "uint4 __ovld __cnfn mad_hi(uint4 a, uint4 b, uint4 c);\n" |
| 36266 | "int8 __ovld __cnfn mad_hi(int8 a, int8 b, int8 c);\n" |
| 36267 | "uint8 __ovld __cnfn mad_hi(uint8 a, uint8 b, uint8 c);\n" |
| 36268 | "int16 __ovld __cnfn mad_hi(int16 a, int16 b, int16 c);\n" |
| 36269 | "uint16 __ovld __cnfn mad_hi(uint16 a, uint16 b, uint16 c);\n" |
| 36270 | "long __ovld __cnfn mad_hi(long a, long b, long c);\n" |
| 36271 | "ulong __ovld __cnfn mad_hi(ulong a, ulong b, ulong c);\n" |
| 36272 | "long2 __ovld __cnfn mad_hi(long2 a, long2 b, long2 c);\n" |
| 36273 | "ulong2 __ovld __cnfn mad_hi(ulong2 a, ulong2 b, ulong2 c);\n" |
| 36274 | "long3 __ovld __cnfn mad_hi(long3 a, long3 b, long3 c);\n" |
| 36275 | "ulong3 __ovld __cnfn mad_hi(ulong3 a, ulong3 b, ulong3 c);\n" |
| 36276 | "long4 __ovld __cnfn mad_hi(long4 a, long4 b, long4 c);\n" |
| 36277 | "ulong4 __ovld __cnfn mad_hi(ulong4 a, ulong4 b, ulong4 c);\n" |
| 36278 | "long8 __ovld __cnfn mad_hi(long8 a, long8 b, long8 c);\n" |
| 36279 | "ulong8 __ovld __cnfn mad_hi(ulong8 a, ulong8 b, ulong8 c);\n" |
| 36280 | "long16 __ovld __cnfn mad_hi(long16 a, long16 b, long16 c);\n" |
| 36281 | "ulong16 __ovld __cnfn mad_hi(ulong16 a, ulong16 b, ulong16 c);\n" |
| 36282 | "\n" |
| 36283 | "/**\n" |
| 36284 | " * Returns a * b + c and saturates the result.\n" |
| 36285 | " */\n" |
| 36286 | "char __ovld __cnfn mad_sat(char a, char b, char c);\n" |
| 36287 | "uchar __ovld __cnfn mad_sat(uchar a, uchar b, uchar c);\n" |
| 36288 | "char2 __ovld __cnfn mad_sat(char2 a, char2 b, char2 c);\n" |
| 36289 | "uchar2 __ovld __cnfn mad_sat(uchar2 a, uchar2 b, uchar2 c);\n" |
| 36290 | "char3 __ovld __cnfn mad_sat(char3 a, char3 b, char3 c);\n" |
| 36291 | "uchar3 __ovld __cnfn mad_sat(uchar3 a, uchar3 b, uchar3 c);\n" |
| 36292 | "char4 __ovld __cnfn mad_sat(char4 a, char4 b, char4 c);\n" |
| 36293 | "uchar4 __ovld __cnfn mad_sat(uchar4 a, uchar4 b, uchar4 c);\n" |
| 36294 | "char8 __ovld __cnfn mad_sat(char8 a, char8 b, char8 c);\n" |
| 36295 | "uchar8 __ovld __cnfn mad_sat(uchar8 a, uchar8 b, uchar8 c);\n" |
| 36296 | "char16 __ovld __cnfn mad_sat(char16 a, char16 b, char16 c);\n" |
| 36297 | "uchar16 __ovld __cnfn mad_sat(uchar16 a, uchar16 b, uchar16 c);\n" |
| 36298 | "short __ovld __cnfn mad_sat(short a, short b, short c);\n" |
| 36299 | "ushort __ovld __cnfn mad_sat(ushort a, ushort b, ushort c);\n" |
| 36300 | "short2 __ovld __cnfn mad_sat(short2 a, short2 b, short2 c);\n" |
| 36301 | "ushort2 __ovld __cnfn mad_sat(ushort2 a, ushort2 b, ushort2 c);\n" |
| 36302 | "short3 __ovld __cnfn mad_sat(short3 a, short3 b, short3 c);\n" |
| 36303 | "ushort3 __ovld __cnfn mad_sat(ushort3 a, ushort3 b, ushort3 c);\n" |
| 36304 | "short4 __ovld __cnfn mad_sat(short4 a, short4 b, short4 c);\n" |
| 36305 | "ushort4 __ovld __cnfn mad_sat(ushort4 a, ushort4 b, ushort4 c);\n" |
| 36306 | "short8 __ovld __cnfn mad_sat(short8 a, short8 b, short8 c);\n" |
| 36307 | "ushort8 __ovld __cnfn mad_sat(ushort8 a, ushort8 b, ushort8 c);\n" |
| 36308 | "short16 __ovld __cnfn mad_sat(short16 a, short16 b, short16 c);\n" |
| 36309 | "ushort16 __ovld __cnfn mad_sat(ushort16 a, ushort16 b, ushort16 c);\n" |
| 36310 | "int __ovld __cnfn mad_sat(int a, int b, int c);\n" |
| 36311 | "uint __ovld __cnfn mad_sat(uint a, uint b, uint c);\n" |
| 36312 | "int2 __ovld __cnfn mad_sat(int2 a, int2 b, int2 c);\n" |
| 36313 | "uint2 __ovld __cnfn mad_sat(uint2 a, uint2 b, uint2 c);\n" |
| 36314 | "int3 __ovld __cnfn mad_sat(int3 a, int3 b, int3 c);\n" |
| 36315 | "uint3 __ovld __cnfn mad_sat(uint3 a, uint3 b, uint3 c);\n" |
| 36316 | "int4 __ovld __cnfn mad_sat(int4 a, int4 b, int4 c);\n" |
| 36317 | "uint4 __ovld __cnfn mad_sat(uint4 a, uint4 b, uint4 c);\n" |
| 36318 | "int8 __ovld __cnfn mad_sat(int8 a, int8 b, int8 c);\n" |
| 36319 | "uint8 __ovld __cnfn mad_sat(uint8 a, uint8 b, uint8 c);\n" |
| 36320 | "int16 __ovld __cnfn mad_sat(int16 a, int16 b, int16 c);\n" |
| 36321 | "uint16 __ovld __cnfn mad_sat(uint16 a, uint16 b, uint16 c);\n" |
| 36322 | "long __ovld __cnfn mad_sat(long a, long b, long c);\n" |
| 36323 | "ulong __ovld __cnfn mad_sat(ulong a, ulong b, ulong c);\n" |
| 36324 | "long2 __ovld __cnfn mad_sat(long2 a, long2 b, long2 c);\n" |
| 36325 | "ulong2 __ovld __cnfn mad_sat(ulong2 a, ulong2 b, ulong2 c);\n" |
| 36326 | "long3 __ovld __cnfn mad_sat(long3 a, long3 b, long3 c);\n" |
| 36327 | "ulong3 __ovld __cnfn mad_sat(ulong3 a, ulong3 b, ulong3 c);\n" |
| 36328 | "long4 __ovld __cnfn mad_sat(long4 a, long4 b, long4 c);\n" |
| 36329 | "ulong4 __ovld __cnfn mad_sat(ulong4 a, ulong4 b, ulong4 c);\n" |
| 36330 | "long8 __ovld __cnfn mad_sat(long8 a, long8 b, long8 c);\n" |
| 36331 | "ulong8 __ovld __cnfn mad_sat(ulong8 a, ulong8 b, ulong8 c);\n" |
| 36332 | "long16 __ovld __cnfn mad_sat(long16 a, long16 b, long16 c);\n" |
| 36333 | "ulong16 __ovld __cnfn mad_sat(ulong16 a, ulong16 b, ulong16 c);\n" |
| 36334 | "\n" |
| 36335 | "/**\n" |
| 36336 | " * Returns y if x < y, otherwise it returns x.\n" |
| 36337 | " */\n" |
| 36338 | "char __ovld __cnfn max(char x, char y);\n" |
| 36339 | "uchar __ovld __cnfn max(uchar x, uchar y);\n" |
| 36340 | "char2 __ovld __cnfn max(char2 x, char2 y);\n" |
| 36341 | "uchar2 __ovld __cnfn max(uchar2 x, uchar2 y);\n" |
| 36342 | "char3 __ovld __cnfn max(char3 x, char3 y);\n" |
| 36343 | "uchar3 __ovld __cnfn max(uchar3 x, uchar3 y);\n" |
| 36344 | "char4 __ovld __cnfn max(char4 x, char4 y);\n" |
| 36345 | "uchar4 __ovld __cnfn max(uchar4 x, uchar4 y);\n" |
| 36346 | "char8 __ovld __cnfn max(char8 x, char8 y);\n" |
| 36347 | "uchar8 __ovld __cnfn max(uchar8 x, uchar8 y);\n" |
| 36348 | "char16 __ovld __cnfn max(char16 x, char16 y);\n" |
| 36349 | "uchar16 __ovld __cnfn max(uchar16 x, uchar16 y);\n" |
| 36350 | "short __ovld __cnfn max(short x, short y);\n" |
| 36351 | "ushort __ovld __cnfn max(ushort x, ushort y);\n" |
| 36352 | "short2 __ovld __cnfn max(short2 x, short2 y);\n" |
| 36353 | "ushort2 __ovld __cnfn max(ushort2 x, ushort2 y);\n" |
| 36354 | "short3 __ovld __cnfn max(short3 x, short3 y);\n" |
| 36355 | "ushort3 __ovld __cnfn max(ushort3 x, ushort3 y);\n" |
| 36356 | "short4 __ovld __cnfn max(short4 x, short4 y);\n" |
| 36357 | "ushort4 __ovld __cnfn max(ushort4 x, ushort4 y);\n" |
| 36358 | "short8 __ovld __cnfn max(short8 x, short8 y);\n" |
| 36359 | "ushort8 __ovld __cnfn max(ushort8 x, ushort8 y);\n" |
| 36360 | "short16 __ovld __cnfn max(short16 x, short16 y);\n" |
| 36361 | "ushort16 __ovld __cnfn max(ushort16 x, ushort16 y);\n" |
| 36362 | "int __ovld __cnfn max(int x, int y);\n" |
| 36363 | "uint __ovld __cnfn max(uint x, uint y);\n" |
| 36364 | "int2 __ovld __cnfn max(int2 x, int2 y);\n" |
| 36365 | "uint2 __ovld __cnfn max(uint2 x, uint2 y);\n" |
| 36366 | "int3 __ovld __cnfn max(int3 x, int3 y);\n" |
| 36367 | "uint3 __ovld __cnfn max(uint3 x, uint3 y);\n" |
| 36368 | "int4 __ovld __cnfn max(int4 x, int4 y);\n" |
| 36369 | "uint4 __ovld __cnfn max(uint4 x, uint4 y);\n" |
| 36370 | "int8 __ovld __cnfn max(int8 x, int8 y);\n" |
| 36371 | "uint8 __ovld __cnfn max(uint8 x, uint8 y);\n" |
| 36372 | "int16 __ovld __cnfn max(int16 x, int16 y);\n" |
| 36373 | "uint16 __ovld __cnfn max(uint16 x, uint16 y);\n" |
| 36374 | "long __ovld __cnfn max(long x, long y);\n" |
| 36375 | "ulong __ovld __cnfn max(ulong x, ulong y);\n" |
| 36376 | "long2 __ovld __cnfn max(long2 x, long2 y);\n" |
| 36377 | "ulong2 __ovld __cnfn max(ulong2 x, ulong2 y);\n" |
| 36378 | "long3 __ovld __cnfn max(long3 x, long3 y);\n" |
| 36379 | "ulong3 __ovld __cnfn max(ulong3 x, ulong3 y);\n" |
| 36380 | "long4 __ovld __cnfn max(long4 x, long4 y);\n" |
| 36381 | "ulong4 __ovld __cnfn max(ulong4 x, ulong4 y);\n" |
| 36382 | "long8 __ovld __cnfn max(long8 x, long8 y);\n" |
| 36383 | "ulong8 __ovld __cnfn max(ulong8 x, ulong8 y);\n" |
| 36384 | "long16 __ovld __cnfn max(long16 x, long16 y);\n" |
| 36385 | "ulong16 __ovld __cnfn max(ulong16 x, ulong16 y);\n" |
| 36386 | "char __ovld __cnfn max(char x, char y);\n" |
| 36387 | "uchar __ovld __cnfn max(uchar x, uchar y);\n" |
| 36388 | "char2 __ovld __cnfn max(char2 x, char y);\n" |
| 36389 | "uchar2 __ovld __cnfn max(uchar2 x, uchar y);\n" |
| 36390 | "char3 __ovld __cnfn max(char3 x, char y);\n" |
| 36391 | "uchar3 __ovld __cnfn max(uchar3 x, uchar y);\n" |
| 36392 | "char4 __ovld __cnfn max(char4 x, char y);\n" |
| 36393 | "uchar4 __ovld __cnfn max(uchar4 x, uchar y);\n" |
| 36394 | "char8 __ovld __cnfn max(char8 x, char y);\n" |
| 36395 | "uchar8 __ovld __cnfn max(uchar8 x, uchar y);\n" |
| 36396 | "char16 __ovld __cnfn max(char16 x, char y);\n" |
| 36397 | "uchar16 __ovld __cnfn max(uchar16 x, uchar y);\n" |
| 36398 | "short __ovld __cnfn max(short x, short y);\n" |
| 36399 | "ushort __ovld __cnfn max(ushort x, ushort y);\n" |
| 36400 | "short2 __ovld __cnfn max(short2 x, short y);\n" |
| 36401 | "ushort2 __ovld __cnfn max(ushort2 x, ushort y);\n" |
| 36402 | "short3 __ovld __cnfn max(short3 x, short y);\n" |
| 36403 | "ushort3 __ovld __cnfn max(ushort3 x, ushort y);\n" |
| 36404 | "short4 __ovld __cnfn max(short4 x, short y);\n" |
| 36405 | "ushort4 __ovld __cnfn max(ushort4 x, ushort y);\n" |
| 36406 | "short8 __ovld __cnfn max(short8 x, short y);\n" |
| 36407 | "ushort8 __ovld __cnfn max(ushort8 x, ushort y);\n" |
| 36408 | "short16 __ovld __cnfn max(short16 x, short y);\n" |
| 36409 | "ushort16 __ovld __cnfn max(ushort16 x, ushort y);\n" |
| 36410 | "int __ovld __cnfn max(int x, int y);\n" |
| 36411 | "uint __ovld __cnfn max(uint x, uint y);\n" |
| 36412 | "int2 __ovld __cnfn max(int2 x, int y);\n" |
| 36413 | "uint2 __ovld __cnfn max(uint2 x, uint y);\n" |
| 36414 | "int3 __ovld __cnfn max(int3 x, int y);\n" |
| 36415 | "uint3 __ovld __cnfn max(uint3 x, uint y);\n" |
| 36416 | "int4 __ovld __cnfn max(int4 x, int y);\n" |
| 36417 | "uint4 __ovld __cnfn max(uint4 x, uint y);\n" |
| 36418 | "int8 __ovld __cnfn max(int8 x, int y);\n" |
| 36419 | "uint8 __ovld __cnfn max(uint8 x, uint y);\n" |
| 36420 | "int16 __ovld __cnfn max(int16 x, int y);\n" |
| 36421 | "uint16 __ovld __cnfn max(uint16 x, uint y);\n" |
| 36422 | "long __ovld __cnfn max(long x, long y);\n" |
| 36423 | "ulong __ovld __cnfn max(ulong x, ulong y);\n" |
| 36424 | "long2 __ovld __cnfn max(long2 x, long y);\n" |
| 36425 | "ulong2 __ovld __cnfn max(ulong2 x, ulong y);\n" |
| 36426 | "long3 __ovld __cnfn max(long3 x, long y);\n" |
| 36427 | "ulong3 __ovld __cnfn max(ulong3 x, ulong y);\n" |
| 36428 | "long4 __ovld __cnfn max(long4 x, long y);\n" |
| 36429 | "ulong4 __ovld __cnfn max(ulong4 x, ulong y);\n" |
| 36430 | "long8 __ovld __cnfn max(long8 x, long y);\n" |
| 36431 | "ulong8 __ovld __cnfn max(ulong8 x, ulong y);\n" |
| 36432 | "long16 __ovld __cnfn max(long16 x, long y);\n" |
| 36433 | "ulong16 __ovld __cnfn max(ulong16 x, ulong y);\n" |
| 36434 | "\n" |
| 36435 | "/**\n" |
| 36436 | " * Returns y if y < x, otherwise it returns x.\n" |
| 36437 | " */\n" |
| 36438 | "char __ovld __cnfn min(char x, char y);\n" |
| 36439 | "uchar __ovld __cnfn min(uchar x, uchar y);\n" |
| 36440 | "char2 __ovld __cnfn min(char2 x, char2 y);\n" |
| 36441 | "uchar2 __ovld __cnfn min(uchar2 x, uchar2 y);\n" |
| 36442 | "char3 __ovld __cnfn min(char3 x, char3 y);\n" |
| 36443 | "uchar3 __ovld __cnfn min(uchar3 x, uchar3 y);\n" |
| 36444 | "char4 __ovld __cnfn min(char4 x, char4 y);\n" |
| 36445 | "uchar4 __ovld __cnfn min(uchar4 x, uchar4 y);\n" |
| 36446 | "char8 __ovld __cnfn min(char8 x, char8 y);\n" |
| 36447 | "uchar8 __ovld __cnfn min(uchar8 x, uchar8 y);\n" |
| 36448 | "char16 __ovld __cnfn min(char16 x, char16 y);\n" |
| 36449 | "uchar16 __ovld __cnfn min(uchar16 x, uchar16 y);\n" |
| 36450 | "short __ovld __cnfn min(short x, short y);\n" |
| 36451 | "ushort __ovld __cnfn min(ushort x, ushort y);\n" |
| 36452 | "short2 __ovld __cnfn min(short2 x, short2 y);\n" |
| 36453 | "ushort2 __ovld __cnfn min(ushort2 x, ushort2 y);\n" |
| 36454 | "short3 __ovld __cnfn min(short3 x, short3 y);\n" |
| 36455 | "ushort3 __ovld __cnfn min(ushort3 x, ushort3 y);\n" |
| 36456 | "short4 __ovld __cnfn min(short4 x, short4 y);\n" |
| 36457 | "ushort4 __ovld __cnfn min(ushort4 x, ushort4 y);\n" |
| 36458 | "short8 __ovld __cnfn min(short8 x, short8 y);\n" |
| 36459 | "ushort8 __ovld __cnfn min(ushort8 x, ushort8 y);\n" |
| 36460 | "short16 __ovld __cnfn min(short16 x, short16 y);\n" |
| 36461 | "ushort16 __ovld __cnfn min(ushort16 x, ushort16 y);\n" |
| 36462 | "int __ovld __cnfn min(int x, int y);\n" |
| 36463 | "uint __ovld __cnfn min(uint x, uint y);\n" |
| 36464 | "int2 __ovld __cnfn min(int2 x, int2 y);\n" |
| 36465 | "uint2 __ovld __cnfn min(uint2 x, uint2 y);\n" |
| 36466 | "int3 __ovld __cnfn min(int3 x, int3 y);\n" |
| 36467 | "uint3 __ovld __cnfn min(uint3 x, uint3 y);\n" |
| 36468 | "int4 __ovld __cnfn min(int4 x, int4 y);\n" |
| 36469 | "uint4 __ovld __cnfn min(uint4 x, uint4 y);\n" |
| 36470 | "int8 __ovld __cnfn min(int8 x, int8 y);\n" |
| 36471 | "uint8 __ovld __cnfn min(uint8 x, uint8 y);\n" |
| 36472 | "int16 __ovld __cnfn min(int16 x, int16 y);\n" |
| 36473 | "uint16 __ovld __cnfn min(uint16 x, uint16 y);\n" |
| 36474 | "long __ovld __cnfn min(long x, long y);\n" |
| 36475 | "ulong __ovld __cnfn min(ulong x, ulong y);\n" |
| 36476 | "long2 __ovld __cnfn min(long2 x, long2 y);\n" |
| 36477 | "ulong2 __ovld __cnfn min(ulong2 x, ulong2 y);\n" |
| 36478 | "long3 __ovld __cnfn min(long3 x, long3 y);\n" |
| 36479 | "ulong3 __ovld __cnfn min(ulong3 x, ulong3 y);\n" |
| 36480 | "long4 __ovld __cnfn min(long4 x, long4 y);\n" |
| 36481 | "ulong4 __ovld __cnfn min(ulong4 x, ulong4 y);\n" |
| 36482 | "long8 __ovld __cnfn min(long8 x, long8 y);\n" |
| 36483 | "ulong8 __ovld __cnfn min(ulong8 x, ulong8 y);\n" |
| 36484 | "long16 __ovld __cnfn min(long16 x, long16 y);\n" |
| 36485 | "ulong16 __ovld __cnfn min(ulong16 x, ulong16 y);\n" |
| 36486 | "char __ovld __cnfn min(char x, char y);\n" |
| 36487 | "uchar __ovld __cnfn min(uchar x, uchar y);\n" |
| 36488 | "char2 __ovld __cnfn min(char2 x, char y);\n" |
| 36489 | "uchar2 __ovld __cnfn min(uchar2 x, uchar y);\n" |
| 36490 | "char3 __ovld __cnfn min(char3 x, char y);\n" |
| 36491 | "uchar3 __ovld __cnfn min(uchar3 x, uchar y);\n" |
| 36492 | "char4 __ovld __cnfn min(char4 x, char y);\n" |
| 36493 | "uchar4 __ovld __cnfn min(uchar4 x, uchar y);\n" |
| 36494 | "char8 __ovld __cnfn min(char8 x, char y);\n" |
| 36495 | "uchar8 __ovld __cnfn min(uchar8 x, uchar y);\n" |
| 36496 | "char16 __ovld __cnfn min(char16 x, char y);\n" |
| 36497 | "uchar16 __ovld __cnfn min(uchar16 x, uchar y);\n" |
| 36498 | "short __ovld __cnfn min(short x, short y);\n" |
| 36499 | "ushort __ovld __cnfn min(ushort x, ushort y);\n" |
| 36500 | "short2 __ovld __cnfn min(short2 x, short y);\n" |
| 36501 | "ushort2 __ovld __cnfn min(ushort2 x, ushort y);\n" |
| 36502 | "short3 __ovld __cnfn min(short3 x, short y);\n" |
| 36503 | "ushort3 __ovld __cnfn min(ushort3 x, ushort y);\n" |
| 36504 | "short4 __ovld __cnfn min(short4 x, short y);\n" |
| 36505 | "ushort4 __ovld __cnfn min(ushort4 x, ushort y);\n" |
| 36506 | "short8 __ovld __cnfn min(short8 x, short y);\n" |
| 36507 | "ushort8 __ovld __cnfn min(ushort8 x, ushort y);\n" |
| 36508 | "short16 __ovld __cnfn min(short16 x, short y);\n" |
| 36509 | "ushort16 __ovld __cnfn min(ushort16 x, ushort y);\n" |
| 36510 | "int __ovld __cnfn min(int x, int y);\n" |
| 36511 | "uint __ovld __cnfn min(uint x, uint y);\n" |
| 36512 | "int2 __ovld __cnfn min(int2 x, int y);\n" |
| 36513 | "uint2 __ovld __cnfn min(uint2 x, uint y);\n" |
| 36514 | "int3 __ovld __cnfn min(int3 x, int y);\n" |
| 36515 | "uint3 __ovld __cnfn min(uint3 x, uint y);\n" |
| 36516 | "int4 __ovld __cnfn min(int4 x, int y);\n" |
| 36517 | "uint4 __ovld __cnfn min(uint4 x, uint y);\n" |
| 36518 | "int8 __ovld __cnfn min(int8 x, int y);\n" |
| 36519 | "uint8 __ovld __cnfn min(uint8 x, uint y);\n" |
| 36520 | "int16 __ovld __cnfn min(int16 x, int y);\n" |
| 36521 | "uint16 __ovld __cnfn min(uint16 x, uint y);\n" |
| 36522 | "long __ovld __cnfn min(long x, long y);\n" |
| 36523 | "ulong __ovld __cnfn min(ulong x, ulong y);\n" |
| 36524 | "long2 __ovld __cnfn min(long2 x, long y);\n" |
| 36525 | "ulong2 __ovld __cnfn min(ulong2 x, ulong y);\n" |
| 36526 | "long3 __ovld __cnfn min(long3 x, long y);\n" |
| 36527 | "ulong3 __ovld __cnfn min(ulong3 x, ulong y);\n" |
| 36528 | "long4 __ovld __cnfn min(long4 x, long y);\n" |
| 36529 | "ulong4 __ovld __cnfn min(ulong4 x, ulong y);\n" |
| 36530 | "long8 __ovld __cnfn min(long8 x, long y);\n" |
| 36531 | "ulong8 __ovld __cnfn min(ulong8 x, ulong y);\n" |
| 36532 | "long16 __ovld __cnfn min(long16 x, long y);\n" |
| 36533 | "ulong16 __ovld __cnfn min(ulong16 x, ulong y);\n" |
| 36534 | "\n" |
| 36535 | "/**\n" |
| 36536 | " * Computes x * y and returns the high half of the\n" |
| 36537 | " * product of x and y.\n" |
| 36538 | " */\n" |
| 36539 | "char __ovld __cnfn mul_hi(char x, char y);\n" |
| 36540 | "uchar __ovld __cnfn mul_hi(uchar x, uchar y);\n" |
| 36541 | "char2 __ovld __cnfn mul_hi(char2 x, char2 y);\n" |
| 36542 | "uchar2 __ovld __cnfn mul_hi(uchar2 x, uchar2 y);\n" |
| 36543 | "char3 __ovld __cnfn mul_hi(char3 x, char3 y);\n" |
| 36544 | "uchar3 __ovld __cnfn mul_hi(uchar3 x, uchar3 y);\n" |
| 36545 | "char4 __ovld __cnfn mul_hi(char4 x, char4 y);\n" |
| 36546 | "uchar4 __ovld __cnfn mul_hi(uchar4 x, uchar4 y);\n" |
| 36547 | "char8 __ovld __cnfn mul_hi(char8 x, char8 y);\n" |
| 36548 | "uchar8 __ovld __cnfn mul_hi(uchar8 x, uchar8 y);\n" |
| 36549 | "char16 __ovld __cnfn mul_hi(char16 x, char16 y);\n" |
| 36550 | "uchar16 __ovld __cnfn mul_hi(uchar16 x, uchar16 y);\n" |
| 36551 | "short __ovld __cnfn mul_hi(short x, short y);\n" |
| 36552 | "ushort __ovld __cnfn mul_hi(ushort x, ushort y);\n" |
| 36553 | "short2 __ovld __cnfn mul_hi(short2 x, short2 y);\n" |
| 36554 | "ushort2 __ovld __cnfn mul_hi(ushort2 x, ushort2 y);\n" |
| 36555 | "short3 __ovld __cnfn mul_hi(short3 x, short3 y);\n" |
| 36556 | "ushort3 __ovld __cnfn mul_hi(ushort3 x, ushort3 y);\n" |
| 36557 | "short4 __ovld __cnfn mul_hi(short4 x, short4 y);\n" |
| 36558 | "ushort4 __ovld __cnfn mul_hi(ushort4 x, ushort4 y);\n" |
| 36559 | "short8 __ovld __cnfn mul_hi(short8 x, short8 y);\n" |
| 36560 | "ushort8 __ovld __cnfn mul_hi(ushort8 x, ushort8 y);\n" |
| 36561 | "short16 __ovld __cnfn mul_hi(short16 x, short16 y);\n" |
| 36562 | "ushort16 __ovld __cnfn mul_hi(ushort16 x, ushort16 y);\n" |
| 36563 | "int __ovld __cnfn mul_hi(int x, int y);\n" |
| 36564 | "uint __ovld __cnfn mul_hi(uint x, uint y);\n" |
| 36565 | "int2 __ovld __cnfn mul_hi(int2 x, int2 y);\n" |
| 36566 | "uint2 __ovld __cnfn mul_hi(uint2 x, uint2 y);\n" |
| 36567 | "int3 __ovld __cnfn mul_hi(int3 x, int3 y);\n" |
| 36568 | "uint3 __ovld __cnfn mul_hi(uint3 x, uint3 y);\n" |
| 36569 | "int4 __ovld __cnfn mul_hi(int4 x, int4 y);\n" |
| 36570 | "uint4 __ovld __cnfn mul_hi(uint4 x, uint4 y);\n" |
| 36571 | "int8 __ovld __cnfn mul_hi(int8 x, int8 y);\n" |
| 36572 | "uint8 __ovld __cnfn mul_hi(uint8 x, uint8 y);\n" |
| 36573 | "int16 __ovld __cnfn mul_hi(int16 x, int16 y);\n" |
| 36574 | "uint16 __ovld __cnfn mul_hi(uint16 x, uint16 y);\n" |
| 36575 | "long __ovld __cnfn mul_hi(long x, long y);\n" |
| 36576 | "ulong __ovld __cnfn mul_hi(ulong x, ulong y);\n" |
| 36577 | "long2 __ovld __cnfn mul_hi(long2 x, long2 y);\n" |
| 36578 | "ulong2 __ovld __cnfn mul_hi(ulong2 x, ulong2 y);\n" |
| 36579 | "long3 __ovld __cnfn mul_hi(long3 x, long3 y);\n" |
| 36580 | "ulong3 __ovld __cnfn mul_hi(ulong3 x, ulong3 y);\n" |
| 36581 | "long4 __ovld __cnfn mul_hi(long4 x, long4 y);\n" |
| 36582 | "ulong4 __ovld __cnfn mul_hi(ulong4 x, ulong4 y);\n" |
| 36583 | "long8 __ovld __cnfn mul_hi(long8 x, long8 y);\n" |
| 36584 | "ulong8 __ovld __cnfn mul_hi(ulong8 x, ulong8 y);\n" |
| 36585 | "long16 __ovld __cnfn mul_hi(long16 x, long16 y);\n" |
| 36586 | "ulong16 __ovld __cnfn mul_hi(ulong16 x, ulong16 y);\n" |
| 36587 | "\n" |
| 36588 | "/**\n" |
| 36589 | " * For each element in v, the bits are shifted left by\n" |
| 36590 | " * the number of bits given by the corresponding\n" |
| 36591 | " * element in i (subject to usual shift modulo rules\n" |
| 36592 | " * described in section 6.3). Bits shifted off the left\n" |
| 36593 | " * side of the element are shifted back in from the\n" |
| 36594 | " * right.\n" |
| 36595 | " */\n" |
| 36596 | "char __ovld __cnfn rotate(char v, char i);\n" |
| 36597 | "uchar __ovld __cnfn rotate(uchar v, uchar i);\n" |
| 36598 | "char2 __ovld __cnfn rotate(char2 v, char2 i);\n" |
| 36599 | "uchar2 __ovld __cnfn rotate(uchar2 v, uchar2 i);\n" |
| 36600 | "char3 __ovld __cnfn rotate(char3 v, char3 i);\n" |
| 36601 | "uchar3 __ovld __cnfn rotate(uchar3 v, uchar3 i);\n" |
| 36602 | "char4 __ovld __cnfn rotate(char4 v, char4 i);\n" |
| 36603 | "uchar4 __ovld __cnfn rotate(uchar4 v, uchar4 i);\n" |
| 36604 | "char8 __ovld __cnfn rotate(char8 v, char8 i);\n" |
| 36605 | "uchar8 __ovld __cnfn rotate(uchar8 v, uchar8 i);\n" |
| 36606 | "char16 __ovld __cnfn rotate(char16 v, char16 i);\n" |
| 36607 | "uchar16 __ovld __cnfn rotate(uchar16 v, uchar16 i);\n" |
| 36608 | "short __ovld __cnfn rotate(short v, short i);\n" |
| 36609 | "ushort __ovld __cnfn rotate(ushort v, ushort i);\n" |
| 36610 | "short2 __ovld __cnfn rotate(short2 v, short2 i);\n" |
| 36611 | "ushort2 __ovld __cnfn rotate(ushort2 v, ushort2 i);\n" |
| 36612 | "short3 __ovld __cnfn rotate(short3 v, short3 i);\n" |
| 36613 | "ushort3 __ovld __cnfn rotate(ushort3 v, ushort3 i);\n" |
| 36614 | "short4 __ovld __cnfn rotate(short4 v, short4 i);\n" |
| 36615 | "ushort4 __ovld __cnfn rotate(ushort4 v, ushort4 i);\n" |
| 36616 | "short8 __ovld __cnfn rotate(short8 v, short8 i);\n" |
| 36617 | "ushort8 __ovld __cnfn rotate(ushort8 v, ushort8 i);\n" |
| 36618 | "short16 __ovld __cnfn rotate(short16 v, short16 i);\n" |
| 36619 | "ushort16 __ovld __cnfn rotate(ushort16 v, ushort16 i);\n" |
| 36620 | "int __ovld __cnfn rotate(int v, int i);\n" |
| 36621 | "uint __ovld __cnfn rotate(uint v, uint i);\n" |
| 36622 | "int2 __ovld __cnfn rotate(int2 v, int2 i);\n" |
| 36623 | "uint2 __ovld __cnfn rotate(uint2 v, uint2 i);\n" |
| 36624 | "int3 __ovld __cnfn rotate(int3 v, int3 i);\n" |
| 36625 | "uint3 __ovld __cnfn rotate(uint3 v, uint3 i);\n" |
| 36626 | "int4 __ovld __cnfn rotate(int4 v, int4 i);\n" |
| 36627 | "uint4 __ovld __cnfn rotate(uint4 v, uint4 i);\n" |
| 36628 | "int8 __ovld __cnfn rotate(int8 v, int8 i);\n" |
| 36629 | "uint8 __ovld __cnfn rotate(uint8 v, uint8 i);\n" |
| 36630 | "int16 __ovld __cnfn rotate(int16 v, int16 i);\n" |
| 36631 | "uint16 __ovld __cnfn rotate(uint16 v, uint16 i);\n" |
| 36632 | "long __ovld __cnfn rotate(long v, long i);\n" |
| 36633 | "ulong __ovld __cnfn rotate(ulong v, ulong i);\n" |
| 36634 | "long2 __ovld __cnfn rotate(long2 v, long2 i);\n" |
| 36635 | "ulong2 __ovld __cnfn rotate(ulong2 v, ulong2 i);\n" |
| 36636 | "long3 __ovld __cnfn rotate(long3 v, long3 i);\n" |
| 36637 | "ulong3 __ovld __cnfn rotate(ulong3 v, ulong3 i);\n" |
| 36638 | "long4 __ovld __cnfn rotate(long4 v, long4 i);\n" |
| 36639 | "ulong4 __ovld __cnfn rotate(ulong4 v, ulong4 i);\n" |
| 36640 | "long8 __ovld __cnfn rotate(long8 v, long8 i);\n" |
| 36641 | "ulong8 __ovld __cnfn rotate(ulong8 v, ulong8 i);\n" |
| 36642 | "long16 __ovld __cnfn rotate(long16 v, long16 i);\n" |
| 36643 | "ulong16 __ovld __cnfn rotate(ulong16 v, ulong16 i);\n" |
| 36644 | "\n" |
| 36645 | "/**\n" |
| 36646 | " * Returns x - y and saturates the result.\n" |
| 36647 | " */\n" |
| 36648 | "char __ovld __cnfn sub_sat(char x, char y);\n" |
| 36649 | "uchar __ovld __cnfn sub_sat(uchar x, uchar y);\n" |
| 36650 | "char2 __ovld __cnfn sub_sat(char2 x, char2 y);\n" |
| 36651 | "uchar2 __ovld __cnfn sub_sat(uchar2 x, uchar2 y);\n" |
| 36652 | "char3 __ovld __cnfn sub_sat(char3 x, char3 y);\n" |
| 36653 | "uchar3 __ovld __cnfn sub_sat(uchar3 x, uchar3 y);\n" |
| 36654 | "char4 __ovld __cnfn sub_sat(char4 x, char4 y);\n" |
| 36655 | "uchar4 __ovld __cnfn sub_sat(uchar4 x, uchar4 y);\n" |
| 36656 | "char8 __ovld __cnfn sub_sat(char8 x, char8 y);\n" |
| 36657 | "uchar8 __ovld __cnfn sub_sat(uchar8 x, uchar8 y);\n" |
| 36658 | "char16 __ovld __cnfn sub_sat(char16 x, char16 y);\n" |
| 36659 | "uchar16 __ovld __cnfn sub_sat(uchar16 x, uchar16 y);\n" |
| 36660 | "short __ovld __cnfn sub_sat(short x, short y);\n" |
| 36661 | "ushort __ovld __cnfn sub_sat(ushort x, ushort y);\n" |
| 36662 | "short2 __ovld __cnfn sub_sat(short2 x, short2 y);\n" |
| 36663 | "ushort2 __ovld __cnfn sub_sat(ushort2 x, ushort2 y);\n" |
| 36664 | "short3 __ovld __cnfn sub_sat(short3 x, short3 y);\n" |
| 36665 | "ushort3 __ovld __cnfn sub_sat(ushort3 x, ushort3 y);\n" |
| 36666 | "short4 __ovld __cnfn sub_sat(short4 x, short4 y);\n" |
| 36667 | "ushort4 __ovld __cnfn sub_sat(ushort4 x, ushort4 y);\n" |
| 36668 | "short8 __ovld __cnfn sub_sat(short8 x, short8 y);\n" |
| 36669 | "ushort8 __ovld __cnfn sub_sat(ushort8 x, ushort8 y);\n" |
| 36670 | "short16 __ovld __cnfn sub_sat(short16 x, short16 y);\n" |
| 36671 | "ushort16 __ovld __cnfn sub_sat(ushort16 x, ushort16 y);\n" |
| 36672 | "int __ovld __cnfn sub_sat(int x, int y);\n" |
| 36673 | "uint __ovld __cnfn sub_sat(uint x, uint y);\n" |
| 36674 | "int2 __ovld __cnfn sub_sat(int2 x, int2 y);\n" |
| 36675 | "uint2 __ovld __cnfn sub_sat(uint2 x, uint2 y);\n" |
| 36676 | "int3 __ovld __cnfn sub_sat(int3 x, int3 y);\n" |
| 36677 | "uint3 __ovld __cnfn sub_sat(uint3 x, uint3 y);\n" |
| 36678 | "int4 __ovld __cnfn sub_sat(int4 x, int4 y);\n" |
| 36679 | "uint4 __ovld __cnfn sub_sat(uint4 x, uint4 y);\n" |
| 36680 | "int8 __ovld __cnfn sub_sat(int8 x, int8 y);\n" |
| 36681 | "uint8 __ovld __cnfn sub_sat(uint8 x, uint8 y);\n" |
| 36682 | "int16 __ovld __cnfn sub_sat(int16 x, int16 y);\n" |
| 36683 | "uint16 __ovld __cnfn sub_sat(uint16 x, uint16 y);\n" |
| 36684 | "long __ovld __cnfn sub_sat(long x, long y);\n" |
| 36685 | "ulong __ovld __cnfn sub_sat(ulong x, ulong y);\n" |
| 36686 | "long2 __ovld __cnfn sub_sat(long2 x, long2 y);\n" |
| 36687 | "ulong2 __ovld __cnfn sub_sat(ulong2 x, ulong2 y);\n" |
| 36688 | "long3 __ovld __cnfn sub_sat(long3 x, long3 y);\n" |
| 36689 | "ulong3 __ovld __cnfn sub_sat(ulong3 x, ulong3 y);\n" |
| 36690 | "long4 __ovld __cnfn sub_sat(long4 x, long4 y);\n" |
| 36691 | "ulong4 __ovld __cnfn sub_sat(ulong4 x, ulong4 y);\n" |
| 36692 | "long8 __ovld __cnfn sub_sat(long8 x, long8 y);\n" |
| 36693 | "ulong8 __ovld __cnfn sub_sat(ulong8 x, ulong8 y);\n" |
| 36694 | "long16 __ovld __cnfn sub_sat(long16 x, long16 y);\n" |
| 36695 | "ulong16 __ovld __cnfn sub_sat(ulong16 x, ulong16 y);\n" |
| 36696 | "\n" |
| 36697 | "/**\n" |
| 36698 | " * result[i] = ((short)hi[i] << 8) | lo[i]\n" |
| 36699 | " * result[i] = ((ushort)hi[i] << 8) | lo[i]\n" |
| 36700 | " */\n" |
| 36701 | "short __ovld __cnfn upsample(char hi, uchar lo);\n" |
| 36702 | "ushort __ovld __cnfn upsample(uchar hi, uchar lo);\n" |
| 36703 | "short2 __ovld __cnfn upsample(char2 hi, uchar2 lo);\n" |
| 36704 | "short3 __ovld __cnfn upsample(char3 hi, uchar3 lo);\n" |
| 36705 | "short4 __ovld __cnfn upsample(char4 hi, uchar4 lo);\n" |
| 36706 | "short8 __ovld __cnfn upsample(char8 hi, uchar8 lo);\n" |
| 36707 | "short16 __ovld __cnfn upsample(char16 hi, uchar16 lo);\n" |
| 36708 | "ushort2 __ovld __cnfn upsample(uchar2 hi, uchar2 lo);\n" |
| 36709 | "ushort3 __ovld __cnfn upsample(uchar3 hi, uchar3 lo);\n" |
| 36710 | "ushort4 __ovld __cnfn upsample(uchar4 hi, uchar4 lo);\n" |
| 36711 | "ushort8 __ovld __cnfn upsample(uchar8 hi, uchar8 lo);\n" |
| 36712 | "ushort16 __ovld __cnfn upsample(uchar16 hi, uchar16 lo);\n" |
| 36713 | "\n" |
| 36714 | "/**\n" |
| 36715 | " * result[i] = ((int)hi[i] << 16) | lo[i]\n" |
| 36716 | " * result[i] = ((uint)hi[i] << 16) | lo[i]\n" |
| 36717 | " */\n" |
| 36718 | "int __ovld __cnfn upsample(short hi, ushort lo);\n" |
| 36719 | "uint __ovld __cnfn upsample(ushort hi, ushort lo);\n" |
| 36720 | "int2 __ovld __cnfn upsample(short2 hi, ushort2 lo);\n" |
| 36721 | "int3 __ovld __cnfn upsample(short3 hi, ushort3 lo);\n" |
| 36722 | "int4 __ovld __cnfn upsample(short4 hi, ushort4 lo);\n" |
| 36723 | "int8 __ovld __cnfn upsample(short8 hi, ushort8 lo);\n" |
| 36724 | "int16 __ovld __cnfn upsample(short16 hi, ushort16 lo);\n" |
| 36725 | "uint2 __ovld __cnfn upsample(ushort2 hi, ushort2 lo);\n" |
| 36726 | "uint3 __ovld __cnfn upsample(ushort3 hi, ushort3 lo);\n" |
| 36727 | "uint4 __ovld __cnfn upsample(ushort4 hi, ushort4 lo);\n" |
| 36728 | "uint8 __ovld __cnfn upsample(ushort8 hi, ushort8 lo);\n" |
| 36729 | "uint16 __ovld __cnfn upsample(ushort16 hi, ushort16 lo);\n" |
| 36730 | "/**\n" |
| 36731 | " * result[i] = ((long)hi[i] << 32) | lo[i]\n" |
| 36732 | " * result[i] = ((ulong)hi[i] << 32) | lo[i]\n" |
| 36733 | " */\n" |
| 36734 | "long __ovld __cnfn upsample(int hi, uint lo);\n" |
| 36735 | "ulong __ovld __cnfn upsample(uint hi, uint lo);\n" |
| 36736 | "long2 __ovld __cnfn upsample(int2 hi, uint2 lo);\n" |
| 36737 | "long3 __ovld __cnfn upsample(int3 hi, uint3 lo);\n" |
| 36738 | "long4 __ovld __cnfn upsample(int4 hi, uint4 lo);\n" |
| 36739 | "long8 __ovld __cnfn upsample(int8 hi, uint8 lo);\n" |
| 36740 | "long16 __ovld __cnfn upsample(int16 hi, uint16 lo);\n" |
| 36741 | "ulong2 __ovld __cnfn upsample(uint2 hi, uint2 lo);\n" |
| 36742 | "ulong3 __ovld __cnfn upsample(uint3 hi, uint3 lo);\n" |
| 36743 | "ulong4 __ovld __cnfn upsample(uint4 hi, uint4 lo);\n" |
| 36744 | "ulong8 __ovld __cnfn upsample(uint8 hi, uint8 lo);\n" |
| 36745 | "ulong16 __ovld __cnfn upsample(uint16 hi, uint16 lo);\n" |
| 36746 | "\n" |
| 36747 | "/*\n" |
| 36748 | " * popcount(x): returns the number of set bit in x\n" |
| 36749 | " */\n" |
| 36750 | "char __ovld __cnfn popcount(char x);\n" |
| 36751 | "uchar __ovld __cnfn popcount(uchar x);\n" |
| 36752 | "char2 __ovld __cnfn popcount(char2 x);\n" |
| 36753 | "uchar2 __ovld __cnfn popcount(uchar2 x);\n" |
| 36754 | "char3 __ovld __cnfn popcount(char3 x);\n" |
| 36755 | "uchar3 __ovld __cnfn popcount(uchar3 x);\n" |
| 36756 | "char4 __ovld __cnfn popcount(char4 x);\n" |
| 36757 | "uchar4 __ovld __cnfn popcount(uchar4 x);\n" |
| 36758 | "char8 __ovld __cnfn popcount(char8 x);\n" |
| 36759 | "uchar8 __ovld __cnfn popcount(uchar8 x);\n" |
| 36760 | "char16 __ovld __cnfn popcount(char16 x);\n" |
| 36761 | "uchar16 __ovld __cnfn popcount(uchar16 x);\n" |
| 36762 | "short __ovld __cnfn popcount(short x);\n" |
| 36763 | "ushort __ovld __cnfn popcount(ushort x);\n" |
| 36764 | "short2 __ovld __cnfn popcount(short2 x);\n" |
| 36765 | "ushort2 __ovld __cnfn popcount(ushort2 x);\n" |
| 36766 | "short3 __ovld __cnfn popcount(short3 x);\n" |
| 36767 | "ushort3 __ovld __cnfn popcount(ushort3 x);\n" |
| 36768 | "short4 __ovld __cnfn popcount(short4 x);\n" |
| 36769 | "ushort4 __ovld __cnfn popcount(ushort4 x);\n" |
| 36770 | "short8 __ovld __cnfn popcount(short8 x);\n" |
| 36771 | "ushort8 __ovld __cnfn popcount(ushort8 x);\n" |
| 36772 | "short16 __ovld __cnfn popcount(short16 x);\n" |
| 36773 | "ushort16 __ovld __cnfn popcount(ushort16 x);\n" |
| 36774 | "int __ovld __cnfn popcount(int x);\n" |
| 36775 | "uint __ovld __cnfn popcount(uint x);\n" |
| 36776 | "int2 __ovld __cnfn popcount(int2 x);\n" |
| 36777 | "uint2 __ovld __cnfn popcount(uint2 x);\n" |
| 36778 | "int3 __ovld __cnfn popcount(int3 x);\n" |
| 36779 | "uint3 __ovld __cnfn popcount(uint3 x);\n" |
| 36780 | "int4 __ovld __cnfn popcount(int4 x);\n" |
| 36781 | "uint4 __ovld __cnfn popcount(uint4 x);\n" |
| 36782 | "int8 __ovld __cnfn popcount(int8 x);\n" |
| 36783 | "uint8 __ovld __cnfn popcount(uint8 x);\n" |
| 36784 | "int16 __ovld __cnfn popcount(int16 x);\n" |
| 36785 | "uint16 __ovld __cnfn popcount(uint16 x);\n" |
| 36786 | "long __ovld __cnfn popcount(long x);\n" |
| 36787 | "ulong __ovld __cnfn popcount(ulong x);\n" |
| 36788 | "long2 __ovld __cnfn popcount(long2 x);\n" |
| 36789 | "ulong2 __ovld __cnfn popcount(ulong2 x);\n" |
| 36790 | "long3 __ovld __cnfn popcount(long3 x);\n" |
| 36791 | "ulong3 __ovld __cnfn popcount(ulong3 x);\n" |
| 36792 | "long4 __ovld __cnfn popcount(long4 x);\n" |
| 36793 | "ulong4 __ovld __cnfn popcount(ulong4 x);\n" |
| 36794 | "long8 __ovld __cnfn popcount(long8 x);\n" |
| 36795 | "ulong8 __ovld __cnfn popcount(ulong8 x);\n" |
| 36796 | "long16 __ovld __cnfn popcount(long16 x);\n" |
| 36797 | "ulong16 __ovld __cnfn popcount(ulong16 x);\n" |
| 36798 | "\n" |
| 36799 | "/**\n" |
| 36800 | " * Multiply two 24-bit integer values x and y and add\n" |
| 36801 | " * the 32-bit integer result to the 32-bit integer z.\n" |
| 36802 | " * Refer to definition of mul24 to see how the 24-bit\n" |
| 36803 | " * integer multiplication is performed.\n" |
| 36804 | " */\n" |
| 36805 | "int __ovld __cnfn mad24(int x, int y, int z);\n" |
| 36806 | "uint __ovld __cnfn mad24(uint x, uint y, uint z);\n" |
| 36807 | "int2 __ovld __cnfn mad24(int2 x, int2 y, int2 z);\n" |
| 36808 | "uint2 __ovld __cnfn mad24(uint2 x, uint2 y, uint2 z);\n" |
| 36809 | "int3 __ovld __cnfn mad24(int3 x, int3 y, int3 z);\n" |
| 36810 | "uint3 __ovld __cnfn mad24(uint3 x, uint3 y, uint3 z);\n" |
| 36811 | "int4 __ovld __cnfn mad24(int4 x, int4 y, int4 z);\n" |
| 36812 | "uint4 __ovld __cnfn mad24(uint4 x, uint4 y, uint4 z);\n" |
| 36813 | "int8 __ovld __cnfn mad24(int8 x, int8 y, int8 z);\n" |
| 36814 | "uint8 __ovld __cnfn mad24(uint8 x, uint8 y, uint8 z);\n" |
| 36815 | "int16 __ovld __cnfn mad24(int16 x, int16 y, int16 z);\n" |
| 36816 | "uint16 __ovld __cnfn mad24(uint16 x, uint16 y, uint16 z);\n" |
| 36817 | "\n" |
| 36818 | "/**\n" |
| 36819 | " * Multiply two 24-bit integer values x and y. x and y\n" |
| 36820 | " * are 32-bit integers but only the low 24-bits are used\n" |
| 36821 | " * to perform the multiplication. mul24 should only\n" |
| 36822 | " * be used when values in x and y are in the range [-\n" |
| 36823 | " * 2^23, 2^23-1] if x and y are signed integers and in the\n" |
| 36824 | " * range [0, 2^24-1] if x and y are unsigned integers. If\n" |
| 36825 | " * x and y are not in this range, the multiplication\n" |
| 36826 | " * result is implementation-defined.\n" |
| 36827 | " */\n" |
| 36828 | "int __ovld __cnfn mul24(int x, int y);\n" |
| 36829 | "uint __ovld __cnfn mul24(uint x, uint y);\n" |
| 36830 | "int2 __ovld __cnfn mul24(int2 x, int2 y);\n" |
| 36831 | "uint2 __ovld __cnfn mul24(uint2 x, uint2 y);\n" |
| 36832 | "int3 __ovld __cnfn mul24(int3 x, int3 y);\n" |
| 36833 | "uint3 __ovld __cnfn mul24(uint3 x, uint3 y);\n" |
| 36834 | "int4 __ovld __cnfn mul24(int4 x, int4 y);\n" |
| 36835 | "uint4 __ovld __cnfn mul24(uint4 x, uint4 y);\n" |
| 36836 | "int8 __ovld __cnfn mul24(int8 x, int8 y);\n" |
| 36837 | "uint8 __ovld __cnfn mul24(uint8 x, uint8 y);\n" |
| 36838 | "int16 __ovld __cnfn mul24(int16 x, int16 y);\n" |
| 36839 | "uint16 __ovld __cnfn mul24(uint16 x, uint16 y);\n" |
| 36840 | "\n" |
| 36841 | "// OpenCL v1.1 s6.11.4, v1.2 s6.12.4, v2.0 s6.13.4 - Common Functions\n" |
| 36842 | "\n" |
| 36843 | "/**\n" |
| 36844 | " * Returns fmin(fmax(x, minval), maxval).\n" |
| 36845 | " * Results are undefined if minval > maxval.\n" |
| 36846 | " */\n" |
| 36847 | "float __ovld __cnfn clamp(float x, float minval, float maxval);\n" |
| 36848 | "float2 __ovld __cnfn clamp(float2 x, float2 minval, float2 maxval);\n" |
| 36849 | "float3 __ovld __cnfn clamp(float3 x, float3 minval, float3 maxval);\n" |
| 36850 | "float4 __ovld __cnfn clamp(float4 x, float4 minval, float4 maxval);\n" |
| 36851 | "float8 __ovld __cnfn clamp(float8 x, float8 minval, float8 maxval);\n" |
| 36852 | "float16 __ovld __cnfn clamp(float16 x, float16 minval, float16 maxval);\n" |
| 36853 | "float2 __ovld __cnfn clamp(float2 x, float minval, float maxval);\n" |
| 36854 | "float3 __ovld __cnfn clamp(float3 x, float minval, float maxval);\n" |
| 36855 | "float4 __ovld __cnfn clamp(float4 x, float minval, float maxval);\n" |
| 36856 | "float8 __ovld __cnfn clamp(float8 x, float minval, float maxval);\n" |
| 36857 | "float16 __ovld __cnfn clamp(float16 x, float minval, float maxval);\n" |
| 36858 | "#ifdef cl_khr_fp64\n" |
| 36859 | "double __ovld __cnfn clamp(double x, double minval, double maxval);\n" |
| 36860 | "double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval);\n" |
| 36861 | "double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval);\n" |
| 36862 | "double4 __ovld __cnfn clamp(double4 x, double4 minval, double4 maxval);\n" |
| 36863 | "double8 __ovld __cnfn clamp(double8 x, double8 minval, double8 maxval);\n" |
| 36864 | "double16 __ovld __cnfn clamp(double16 x, double16 minval, double16 maxval);\n" |
| 36865 | "double2 __ovld __cnfn clamp(double2 x, double minval, double maxval);\n" |
| 36866 | "double3 __ovld __cnfn clamp(double3 x, double minval, double maxval);\n" |
| 36867 | "double4 __ovld __cnfn clamp(double4 x, double minval, double maxval);\n" |
| 36868 | "double8 __ovld __cnfn clamp(double8 x, double minval, double maxval);\n" |
| 36869 | "double16 __ovld __cnfn clamp(double16 x, double minval, double maxval);\n" |
| 36870 | "#endif //cl_khr_fp64\n" |
| 36871 | "#ifdef cl_khr_fp16\n" |
| 36872 | "half __ovld __cnfn clamp(half x, half minval, half maxval);\n" |
| 36873 | "half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval);\n" |
| 36874 | "half3 __ovld __cnfn clamp(half3 x, half3 minval, half3 maxval);\n" |
| 36875 | "half4 __ovld __cnfn clamp(half4 x, half4 minval, half4 maxval);\n" |
| 36876 | "half8 __ovld __cnfn clamp(half8 x, half8 minval, half8 maxval);\n" |
| 36877 | "half16 __ovld __cnfn clamp(half16 x, half16 minval, half16 maxval);\n" |
| 36878 | "half2 __ovld __cnfn clamp(half2 x, half minval, half maxval);\n" |
| 36879 | "half3 __ovld __cnfn clamp(half3 x, half minval, half maxval);\n" |
| 36880 | "half4 __ovld __cnfn clamp(half4 x, half minval, half maxval);\n" |
| 36881 | "half8 __ovld __cnfn clamp(half8 x, half minval, half maxval);\n" |
| 36882 | "half16 __ovld __cnfn clamp(half16 x, half minval, half maxval);\n" |
| 36883 | "#endif //cl_khr_fp16\n" |
| 36884 | "\n" |
| 36885 | "/**\n" |
| 36886 | " * Converts radians to degrees, i.e. (180 / PI) *\n" |
| 36887 | " * radians.\n" |
| 36888 | " */\n" |
| 36889 | "float __ovld __cnfn degrees(float radians);\n" |
| 36890 | "float2 __ovld __cnfn degrees(float2 radians);\n" |
| 36891 | "float3 __ovld __cnfn degrees(float3 radians);\n" |
| 36892 | "float4 __ovld __cnfn degrees(float4 radians);\n" |
| 36893 | "float8 __ovld __cnfn degrees(float8 radians);\n" |
| 36894 | "float16 __ovld __cnfn degrees(float16 radians);\n" |
| 36895 | "#ifdef cl_khr_fp64\n" |
| 36896 | "double __ovld __cnfn degrees(double radians);\n" |
| 36897 | "double2 __ovld __cnfn degrees(double2 radians);\n" |
| 36898 | "double3 __ovld __cnfn degrees(double3 radians);\n" |
| 36899 | "double4 __ovld __cnfn degrees(double4 radians);\n" |
| 36900 | "double8 __ovld __cnfn degrees(double8 radians);\n" |
| 36901 | "double16 __ovld __cnfn degrees(double16 radians);\n" |
| 36902 | "#endif //cl_khr_fp64\n" |
| 36903 | "#ifdef cl_khr_fp16\n" |
| 36904 | "half __ovld __cnfn degrees(half radians);\n" |
| 36905 | "half2 __ovld __cnfn degrees(half2 radians);\n" |
| 36906 | "half3 __ovld __cnfn degrees(half3 radians);\n" |
| 36907 | "half4 __ovld __cnfn degrees(half4 radians);\n" |
| 36908 | "half8 __ovld __cnfn degrees(half8 radians);\n" |
| 36909 | "half16 __ovld __cnfn degrees(half16 radians);\n" |
| 36910 | "#endif //cl_khr_fp16\n" |
| 36911 | "\n" |
| 36912 | "/**\n" |
| 36913 | " * Returns y if x < y, otherwise it returns x. If x and y\n" |
| 36914 | " * are infinite or NaN, the return values are undefined.\n" |
| 36915 | " */\n" |
| 36916 | "float __ovld __cnfn max(float x, float y);\n" |
| 36917 | "float2 __ovld __cnfn max(float2 x, float2 y);\n" |
| 36918 | "float3 __ovld __cnfn max(float3 x, float3 y);\n" |
| 36919 | "float4 __ovld __cnfn max(float4 x, float4 y);\n" |
| 36920 | "float8 __ovld __cnfn max(float8 x, float8 y);\n" |
| 36921 | "float16 __ovld __cnfn max(float16 x, float16 y);\n" |
| 36922 | "float2 __ovld __cnfn max(float2 x, float y);\n" |
| 36923 | "float3 __ovld __cnfn max(float3 x, float y);\n" |
| 36924 | "float4 __ovld __cnfn max(float4 x, float y);\n" |
| 36925 | "float8 __ovld __cnfn max(float8 x, float y);\n" |
| 36926 | "float16 __ovld __cnfn max(float16 x, float y);\n" |
| 36927 | "#ifdef cl_khr_fp64\n" |
| 36928 | "double __ovld __cnfn max(double x, double y);\n" |
| 36929 | "double2 __ovld __cnfn max(double2 x, double2 y);\n" |
| 36930 | "double3 __ovld __cnfn max(double3 x, double3 y);\n" |
| 36931 | "double4 __ovld __cnfn max(double4 x, double4 y);\n" |
| 36932 | "double8 __ovld __cnfn max(double8 x, double8 y);\n" |
| 36933 | "double16 __ovld __cnfn max(double16 x, double16 y);\n" |
| 36934 | "double2 __ovld __cnfn max(double2 x, double y);\n" |
| 36935 | "double3 __ovld __cnfn max(double3 x, double y);\n" |
| 36936 | "double4 __ovld __cnfn max(double4 x, double y);\n" |
| 36937 | "double8 __ovld __cnfn max(double8 x, double y);\n" |
| 36938 | "double16 __ovld __cnfn max(double16 x, double y);\n" |
| 36939 | "#endif //cl_khr_fp64\n" |
| 36940 | "#ifdef cl_khr_fp16\n" |
| 36941 | "half __ovld __cnfn max(half x, half y);\n" |
| 36942 | "half2 __ovld __cnfn max(half2 x, half2 y);\n" |
| 36943 | "half3 __ovld __cnfn max(half3 x, half3 y);\n" |
| 36944 | "half4 __ovld __cnfn max(half4 x, half4 y);\n" |
| 36945 | "half8 __ovld __cnfn max(half8 x, half8 y);\n" |
| 36946 | "half16 __ovld __cnfn max(half16 x, half16 y);\n" |
| 36947 | "half2 __ovld __cnfn max(half2 x, half y);\n" |
| 36948 | "half3 __ovld __cnfn max(half3 x, half y);\n" |
| 36949 | "half4 __ovld __cnfn max(half4 x, half y);\n" |
| 36950 | "half8 __ovld __cnfn max(half8 x, half y);\n" |
| 36951 | "half16 __ovld __cnfn max(half16 x, half y);\n" |
| 36952 | "#endif //cl_khr_fp16\n" |
| 36953 | "\n" |
| 36954 | "/**\n" |
| 36955 | " * Returns y if y < x, otherwise it returns x. If x and y\n" |
| 36956 | " * are infinite or NaN, the return values are undefined.\n" |
| 36957 | " */\n" |
| 36958 | "float __ovld __cnfn min(float x, float y);\n" |
| 36959 | "float2 __ovld __cnfn min(float2 x, float2 y);\n" |
| 36960 | "float3 __ovld __cnfn min(float3 x, float3 y);\n" |
| 36961 | "float4 __ovld __cnfn min(float4 x, float4 y);\n" |
| 36962 | "float8 __ovld __cnfn min(float8 x, float8 y);\n" |
| 36963 | "float16 __ovld __cnfn min(float16 x, float16 y);\n" |
| 36964 | "float2 __ovld __cnfn min(float2 x, float y);\n" |
| 36965 | "float3 __ovld __cnfn min(float3 x, float y);\n" |
| 36966 | "float4 __ovld __cnfn min(float4 x, float y);\n" |
| 36967 | "float8 __ovld __cnfn min(float8 x, float y);\n" |
| 36968 | "float16 __ovld __cnfn min(float16 x, float y);\n" |
| 36969 | "#ifdef cl_khr_fp64\n" |
| 36970 | "double __ovld __cnfn min(double x, double y);\n" |
| 36971 | "double2 __ovld __cnfn min(double2 x, double2 y);\n" |
| 36972 | "double3 __ovld __cnfn min(double3 x, double3 y);\n" |
| 36973 | "double4 __ovld __cnfn min(double4 x, double4 y);\n" |
| 36974 | "double8 __ovld __cnfn min(double8 x, double8 y);\n" |
| 36975 | "double16 __ovld __cnfn min(double16 x, double16 y);\n" |
| 36976 | "double2 __ovld __cnfn min(double2 x, double y);\n" |
| 36977 | "double3 __ovld __cnfn min(double3 x, double y);\n" |
| 36978 | "double4 __ovld __cnfn min(double4 x, double y);\n" |
| 36979 | "double8 __ovld __cnfn min(double8 x, double y);\n" |
| 36980 | "double16 __ovld __cnfn min(double16 x, double y);\n" |
| 36981 | "#endif //cl_khr_fp64\n" |
| 36982 | "#ifdef cl_khr_fp16\n" |
| 36983 | "half __ovld __cnfn min(half x, half y);\n" |
| 36984 | "half2 __ovld __cnfn min(half2 x, half2 y);\n" |
| 36985 | "half3 __ovld __cnfn min(half3 x, half3 y);\n" |
| 36986 | "half4 __ovld __cnfn min(half4 x, half4 y);\n" |
| 36987 | "half8 __ovld __cnfn min(half8 x, half8 y);\n" |
| 36988 | "half16 __ovld __cnfn min(half16 x, half16 y);\n" |
| 36989 | "half2 __ovld __cnfn min(half2 x, half y);\n" |
| 36990 | "half3 __ovld __cnfn min(half3 x, half y);\n" |
| 36991 | "half4 __ovld __cnfn min(half4 x, half y);\n" |
| 36992 | "half8 __ovld __cnfn min(half8 x, half y);\n" |
| 36993 | "half16 __ovld __cnfn min(half16 x, half y);\n" |
| 36994 | "#endif //cl_khr_fp16\n" |
| 36995 | "\n" |
| 36996 | "/**\n" |
| 36997 | " * Returns the linear blend of x & y implemented as:\n" |
| 36998 | " * x + (y - x) * a\n" |
| 36999 | " * a must be a value in the range 0.0 ... 1.0. If a is not\n" |
| 37000 | " * in the range 0.0 ... 1.0, the return values are\n" |
| 37001 | " * undefined.\n" |
| 37002 | " */\n" |
| 37003 | "float __ovld __cnfn mix(float x, float y, float a);\n" |
| 37004 | "float2 __ovld __cnfn mix(float2 x, float2 y, float2 a);\n" |
| 37005 | "float3 __ovld __cnfn mix(float3 x, float3 y, float3 a);\n" |
| 37006 | "float4 __ovld __cnfn mix(float4 x, float4 y, float4 a);\n" |
| 37007 | "float8 __ovld __cnfn mix(float8 x, float8 y, float8 a);\n" |
| 37008 | "float16 __ovld __cnfn mix(float16 x, float16 y, float16 a);\n" |
| 37009 | "float2 __ovld __cnfn mix(float2 x, float2 y, float a);\n" |
| 37010 | "float3 __ovld __cnfn mix(float3 x, float3 y, float a);\n" |
| 37011 | "float4 __ovld __cnfn mix(float4 x, float4 y, float a);\n" |
| 37012 | "float8 __ovld __cnfn mix(float8 x, float8 y, float a);\n" |
| 37013 | "float16 __ovld __cnfn mix(float16 x, float16 y, float a);\n" |
| 37014 | "#ifdef cl_khr_fp64\n" |
| 37015 | "double __ovld __cnfn mix(double x, double y, double a);\n" |
| 37016 | "double2 __ovld __cnfn mix(double2 x, double2 y, double2 a);\n" |
| 37017 | "double3 __ovld __cnfn mix(double3 x, double3 y, double3 a);\n" |
| 37018 | "double4 __ovld __cnfn mix(double4 x, double4 y, double4 a);\n" |
| 37019 | "double8 __ovld __cnfn mix(double8 x, double8 y, double8 a);\n" |
| 37020 | "double16 __ovld __cnfn mix(double16 x, double16 y, double16 a);\n" |
| 37021 | "double2 __ovld __cnfn mix(double2 x, double2 y, double a);\n" |
| 37022 | "double3 __ovld __cnfn mix(double3 x, double3 y, double a);\n" |
| 37023 | "double4 __ovld __cnfn mix(double4 x, double4 y, double a);\n" |
| 37024 | "double8 __ovld __cnfn mix(double8 x, double8 y, double a);\n" |
| 37025 | "double16 __ovld __cnfn mix(double16 x, double16 y, double a);\n" |
| 37026 | "#endif //cl_khr_fp64\n" |
| 37027 | "#ifdef cl_khr_fp16\n" |
| 37028 | "half __ovld __cnfn mix(half x, half y, half a);\n" |
| 37029 | "half2 __ovld __cnfn mix(half2 x, half2 y, half2 a);\n" |
| 37030 | "half3 __ovld __cnfn mix(half3 x, half3 y, half3 a);\n" |
| 37031 | "half4 __ovld __cnfn mix(half4 x, half4 y, half4 a);\n" |
| 37032 | "half8 __ovld __cnfn mix(half8 x, half8 y, half8 a);\n" |
| 37033 | "half16 __ovld __cnfn mix(half16 x, half16 y, half16 a);\n" |
| 37034 | "half2 __ovld __cnfn mix(half2 x, half2 y, half a);\n" |
| 37035 | "half3 __ovld __cnfn mix(half3 x, half3 y, half a);\n" |
| 37036 | "half4 __ovld __cnfn mix(half4 x, half4 y, half a);\n" |
| 37037 | "half8 __ovld __cnfn mix(half8 x, half8 y, half a);\n" |
| 37038 | "half16 __ovld __cnfn mix(half16 x, half16 y, half a);\n" |
| 37039 | "#endif //cl_khr_fp16\n" |
| 37040 | "\n" |
| 37041 | "/**\n" |
| 37042 | " * Converts degrees to radians, i.e. (PI / 180) *\n" |
| 37043 | " * degrees.\n" |
| 37044 | " */\n" |
| 37045 | "float __ovld __cnfn radians(float degrees);\n" |
| 37046 | "float2 __ovld __cnfn radians(float2 degrees);\n" |
| 37047 | "float3 __ovld __cnfn radians(float3 degrees);\n" |
| 37048 | "float4 __ovld __cnfn radians(float4 degrees);\n" |
| 37049 | "float8 __ovld __cnfn radians(float8 degrees);\n" |
| 37050 | "float16 __ovld __cnfn radians(float16 degrees);\n" |
| 37051 | "#ifdef cl_khr_fp64\n" |
| 37052 | "double __ovld __cnfn radians(double degrees);\n" |
| 37053 | "double2 __ovld __cnfn radians(double2 degrees);\n" |
| 37054 | "double3 __ovld __cnfn radians(double3 degrees);\n" |
| 37055 | "double4 __ovld __cnfn radians(double4 degrees);\n" |
| 37056 | "double8 __ovld __cnfn radians(double8 degrees);\n" |
| 37057 | "double16 __ovld __cnfn radians(double16 degrees);\n" |
| 37058 | "#endif //cl_khr_fp64\n" |
| 37059 | "#ifdef cl_khr_fp16\n" |
| 37060 | "half __ovld __cnfn radians(half degrees);\n" |
| 37061 | "half2 __ovld __cnfn radians(half2 degrees);\n" |
| 37062 | "half3 __ovld __cnfn radians(half3 degrees);\n" |
| 37063 | "half4 __ovld __cnfn radians(half4 degrees);\n" |
| 37064 | "half8 __ovld __cnfn radians(half8 degrees);\n" |
| 37065 | "half16 __ovld __cnfn radians(half16 degrees);\n" |
| 37066 | "#endif //cl_khr_fp16\n" |
| 37067 | "\n" |
| 37068 | "/**\n" |
| 37069 | " * Returns 0.0 if x < edge, otherwise it returns 1.0.\n" |
| 37070 | " */\n" |
| 37071 | "float __ovld __cnfn step(float edge, float x);\n" |
| 37072 | "float2 __ovld __cnfn step(float2 edge, float2 x);\n" |
| 37073 | "float3 __ovld __cnfn step(float3 edge, float3 x);\n" |
| 37074 | "float4 __ovld __cnfn step(float4 edge, float4 x);\n" |
| 37075 | "float8 __ovld __cnfn step(float8 edge, float8 x);\n" |
| 37076 | "float16 __ovld __cnfn step(float16 edge, float16 x);\n" |
| 37077 | "float2 __ovld __cnfn step(float edge, float2 x);\n" |
| 37078 | "float3 __ovld __cnfn step(float edge, float3 x);\n" |
| 37079 | "float4 __ovld __cnfn step(float edge, float4 x);\n" |
| 37080 | "float8 __ovld __cnfn step(float edge, float8 x);\n" |
| 37081 | "float16 __ovld __cnfn step(float edge, float16 x);\n" |
| 37082 | "#ifdef cl_khr_fp64\n" |
| 37083 | "double __ovld __cnfn step(double edge, double x);\n" |
| 37084 | "double2 __ovld __cnfn step(double2 edge, double2 x);\n" |
| 37085 | "double3 __ovld __cnfn step(double3 edge, double3 x);\n" |
| 37086 | "double4 __ovld __cnfn step(double4 edge, double4 x);\n" |
| 37087 | "double8 __ovld __cnfn step(double8 edge, double8 x);\n" |
| 37088 | "double16 __ovld __cnfn step(double16 edge, double16 x);\n" |
| 37089 | "double2 __ovld __cnfn step(double edge, double2 x);\n" |
| 37090 | "double3 __ovld __cnfn step(double edge, double3 x);\n" |
| 37091 | "double4 __ovld __cnfn step(double edge, double4 x);\n" |
| 37092 | "double8 __ovld __cnfn step(double edge, double8 x);\n" |
| 37093 | "double16 __ovld __cnfn step(double edge, double16 x);\n" |
| 37094 | "#endif //cl_khr_fp64\n" |
| 37095 | "#ifdef cl_khr_fp16\n" |
| 37096 | "half __ovld __cnfn step(half edge, half x);\n" |
| 37097 | "half2 __ovld __cnfn step(half2 edge, half2 x);\n" |
| 37098 | "half3 __ovld __cnfn step(half3 edge, half3 x);\n" |
| 37099 | "half4 __ovld __cnfn step(half4 edge, half4 x);\n" |
| 37100 | "half8 __ovld __cnfn step(half8 edge, half8 x);\n" |
| 37101 | "half16 __ovld __cnfn step(half16 edge, half16 x);\n" |
| 37102 | "half __ovld __cnfn step(half edge, half x);\n" |
| 37103 | "half2 __ovld __cnfn step(half edge, half2 x);\n" |
| 37104 | "half3 __ovld __cnfn step(half edge, half3 x);\n" |
| 37105 | "half4 __ovld __cnfn step(half edge, half4 x);\n" |
| 37106 | "half8 __ovld __cnfn step(half edge, half8 x);\n" |
| 37107 | "half16 __ovld __cnfn step(half edge, half16 x);\n" |
| 37108 | "#endif //cl_khr_fp16\n" |
| 37109 | "\n" |
| 37110 | "/**\n" |
| 37111 | " * Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and\n" |
| 37112 | " * performs smooth Hermite interpolation between 0\n" |
| 37113 | " * and 1when edge0 < x < edge1. This is useful in\n" |
| 37114 | " * cases where you would want a threshold function\n" |
| 37115 | " * with a smooth transition.\n" |
| 37116 | " * This is equivalent to:\n" |
| 37117 | " * gentype t;\n" |
| 37118 | " * t = clamp ((x - edge0) / (edge1 - edge0), 0, 1);\n" |
| 37119 | " * return t * t * (3 - 2 * t);\n" |
| 37120 | " * Results are undefined if edge0 >= edge1 or if x,\n" |
| 37121 | " * edge0 or edge1 is a NaN.\n" |
| 37122 | " */\n" |
| 37123 | "float __ovld __cnfn smoothstep(float edge0, float edge1, float x);\n" |
| 37124 | "float2 __ovld __cnfn smoothstep(float2 edge0, float2 edge1, float2 x);\n" |
| 37125 | "float3 __ovld __cnfn smoothstep(float3 edge0, float3 edge1, float3 x);\n" |
| 37126 | "float4 __ovld __cnfn smoothstep(float4 edge0, float4 edge1, float4 x);\n" |
| 37127 | "float8 __ovld __cnfn smoothstep(float8 edge0, float8 edge1, float8 x);\n" |
| 37128 | "float16 __ovld __cnfn smoothstep(float16 edge0, float16 edge1, float16 x);\n" |
| 37129 | "float2 __ovld __cnfn smoothstep(float edge0, float edge1, float2 x);\n" |
| 37130 | "float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x);\n" |
| 37131 | "float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x);\n" |
| 37132 | "float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x);\n" |
| 37133 | "float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x);\n" |
| 37134 | "#ifdef cl_khr_fp64\n" |
| 37135 | "double __ovld __cnfn smoothstep(double edge0, double edge1, double x);\n" |
| 37136 | "double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x);\n" |
| 37137 | "double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x);\n" |
| 37138 | "double4 __ovld __cnfn smoothstep(double4 edge0, double4 edge1, double4 x);\n" |
| 37139 | "double8 __ovld __cnfn smoothstep(double8 edge0, double8 edge1, double8 x);\n" |
| 37140 | "double16 __ovld __cnfn smoothstep(double16 edge0, double16 edge1, double16 x);\n" |
| 37141 | "double2 __ovld __cnfn smoothstep(double edge0, double edge1, double2 x);\n" |
| 37142 | "double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x);\n" |
| 37143 | "double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x);\n" |
| 37144 | "double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x);\n" |
| 37145 | "double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x);\n" |
| 37146 | "#endif //cl_khr_fp64\n" |
| 37147 | "#ifdef cl_khr_fp16\n" |
| 37148 | "half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n" |
| 37149 | "half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x);\n" |
| 37150 | "half3 __ovld __cnfn smoothstep(half3 edge0, half3 edge1, half3 x);\n" |
| 37151 | "half4 __ovld __cnfn smoothstep(half4 edge0, half4 edge1, half4 x);\n" |
| 37152 | "half8 __ovld __cnfn smoothstep(half8 edge0, half8 edge1, half8 x);\n" |
| 37153 | "half16 __ovld __cnfn smoothstep(half16 edge0, half16 edge1, half16 x);\n" |
| 37154 | "half __ovld __cnfn smoothstep(half edge0, half edge1, half x);\n" |
| 37155 | "half2 __ovld __cnfn smoothstep(half edge0, half edge1, half2 x);\n" |
| 37156 | "half3 __ovld __cnfn smoothstep(half edge0, half edge1, half3 x);\n" |
| 37157 | "half4 __ovld __cnfn smoothstep(half edge0, half edge1, half4 x);\n" |
| 37158 | "half8 __ovld __cnfn smoothstep(half edge0, half edge1, half8 x);\n" |
| 37159 | "half16 __ovld __cnfn smoothstep(half edge0, half edge1, half16 x);\n" |
| 37160 | "#endif //cl_khr_fp16\n" |
| 37161 | "\n" |
| 37162 | "/**\n" |
| 37163 | " * Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x =\n" |
| 37164 | " * +0.0, or -1.0 if x < 0. Returns 0.0 if x is a NaN.\n" |
| 37165 | " */\n" |
| 37166 | "float __ovld __cnfn sign(float x);\n" |
| 37167 | "float2 __ovld __cnfn sign(float2 x);\n" |
| 37168 | "float3 __ovld __cnfn sign(float3 x);\n" |
| 37169 | "float4 __ovld __cnfn sign(float4 x);\n" |
| 37170 | "float8 __ovld __cnfn sign(float8 x);\n" |
| 37171 | "float16 __ovld __cnfn sign(float16 x);\n" |
| 37172 | "#ifdef cl_khr_fp64\n" |
| 37173 | "double __ovld __cnfn sign(double x);\n" |
| 37174 | "double2 __ovld __cnfn sign(double2 x);\n" |
| 37175 | "double3 __ovld __cnfn sign(double3 x);\n" |
| 37176 | "double4 __ovld __cnfn sign(double4 x);\n" |
| 37177 | "double8 __ovld __cnfn sign(double8 x);\n" |
| 37178 | "double16 __ovld __cnfn sign(double16 x);\n" |
| 37179 | "#endif //cl_khr_fp64\n" |
| 37180 | "#ifdef cl_khr_fp16\n" |
| 37181 | "half __ovld __cnfn sign(half x);\n" |
| 37182 | "half2 __ovld __cnfn sign(half2 x);\n" |
| 37183 | "half3 __ovld __cnfn sign(half3 x);\n" |
| 37184 | "half4 __ovld __cnfn sign(half4 x);\n" |
| 37185 | "half8 __ovld __cnfn sign(half8 x);\n" |
| 37186 | "half16 __ovld __cnfn sign(half16 x);\n" |
| 37187 | "#endif //cl_khr_fp16\n" |
| 37188 | "\n" |
| 37189 | "// OpenCL v1.1 s6.11.5, v1.2 s6.12.5, v2.0 s6.13.5 - Geometric Functions\n" |
| 37190 | "\n" |
| 37191 | "/**\n" |
| 37192 | " * Returns the cross product of p0.xyz and p1.xyz. The\n" |
| 37193 | " * w component of float4 result returned will be 0.0.\n" |
| 37194 | " */\n" |
| 37195 | "float4 __ovld __cnfn cross(float4 p0, float4 p1);\n" |
| 37196 | "float3 __ovld __cnfn cross(float3 p0, float3 p1);\n" |
| 37197 | "#ifdef cl_khr_fp64\n" |
| 37198 | "double4 __ovld __cnfn cross(double4 p0, double4 p1);\n" |
| 37199 | "double3 __ovld __cnfn cross(double3 p0, double3 p1);\n" |
| 37200 | "#endif //cl_khr_fp64\n" |
| 37201 | "#ifdef cl_khr_fp16\n" |
| 37202 | "half4 __ovld __cnfn cross(half4 p0, half4 p1);\n" |
| 37203 | "half3 __ovld __cnfn cross(half3 p0, half3 p1);\n" |
| 37204 | "#endif //cl_khr_fp16\n" |
| 37205 | "\n" |
| 37206 | "/**\n" |
| 37207 | " * Compute dot product.\n" |
| 37208 | " */\n" |
| 37209 | "float __ovld __cnfn dot(float p0, float p1);\n" |
| 37210 | "float __ovld __cnfn dot(float2 p0, float2 p1);\n" |
| 37211 | "float __ovld __cnfn dot(float3 p0, float3 p1);\n" |
| 37212 | "float __ovld __cnfn dot(float4 p0, float4 p1);\n" |
| 37213 | "#ifdef cl_khr_fp64\n" |
| 37214 | "double __ovld __cnfn dot(double p0, double p1);\n" |
| 37215 | "double __ovld __cnfn dot(double2 p0, double2 p1);\n" |
| 37216 | "double __ovld __cnfn dot(double3 p0, double3 p1);\n" |
| 37217 | "double __ovld __cnfn dot(double4 p0, double4 p1);\n" |
| 37218 | "#endif //cl_khr_fp64\n" |
| 37219 | "#ifdef cl_khr_fp16\n" |
| 37220 | "half __ovld __cnfn dot(half p0, half p1);\n" |
| 37221 | "half __ovld __cnfn dot(half2 p0, half2 p1);\n" |
| 37222 | "half __ovld __cnfn dot(half3 p0, half3 p1);\n" |
| 37223 | "half __ovld __cnfn dot(half4 p0, half4 p1);\n" |
| 37224 | "#endif //cl_khr_fp16\n" |
| 37225 | "\n" |
| 37226 | "/**\n" |
| 37227 | " * Returns the distance between p0 and p1. This is\n" |
| 37228 | " * calculated as length(p0 - p1).\n" |
| 37229 | " */\n" |
| 37230 | "float __ovld __cnfn distance(float p0, float p1);\n" |
| 37231 | "float __ovld __cnfn distance(float2 p0, float2 p1);\n" |
| 37232 | "float __ovld __cnfn distance(float3 p0, float3 p1);\n" |
| 37233 | "float __ovld __cnfn distance(float4 p0, float4 p1);\n" |
| 37234 | "#ifdef cl_khr_fp64\n" |
| 37235 | "double __ovld __cnfn distance(double p0, double p1);\n" |
| 37236 | "double __ovld __cnfn distance(double2 p0, double2 p1);\n" |
| 37237 | "double __ovld __cnfn distance(double3 p0, double3 p1);\n" |
| 37238 | "double __ovld __cnfn distance(double4 p0, double4 p1);\n" |
| 37239 | "#endif //cl_khr_fp64\n" |
| 37240 | "#ifdef cl_khr_fp16\n" |
| 37241 | "half __ovld __cnfn distance(half p0, half p1);\n" |
| 37242 | "half __ovld __cnfn distance(half2 p0, half2 p1);\n" |
| 37243 | "half __ovld __cnfn distance(half3 p0, half3 p1);\n" |
| 37244 | "half __ovld __cnfn distance(half4 p0, half4 p1);\n" |
| 37245 | "#endif //cl_khr_fp16\n" |
| 37246 | "\n" |
| 37247 | "/**\n" |
| 37248 | " * Return the length of vector p, i.e.,\n" |
| 37249 | " * sqrt(p.x2 + p.y 2 + ...)\n" |
| 37250 | " */\n" |
| 37251 | "float __ovld __cnfn length(float p);\n" |
| 37252 | "float __ovld __cnfn length(float2 p);\n" |
| 37253 | "float __ovld __cnfn length(float3 p);\n" |
| 37254 | "float __ovld __cnfn length(float4 p);\n" |
| 37255 | "#ifdef cl_khr_fp64\n" |
| 37256 | "double __ovld __cnfn length(double p);\n" |
| 37257 | "double __ovld __cnfn length(double2 p);\n" |
| 37258 | "double __ovld __cnfn length(double3 p);\n" |
| 37259 | "double __ovld __cnfn length(double4 p);\n" |
| 37260 | "#endif //cl_khr_fp64\n" |
| 37261 | "#ifdef cl_khr_fp16\n" |
| 37262 | "half __ovld __cnfn length(half p);\n" |
| 37263 | "half __ovld __cnfn length(half2 p);\n" |
| 37264 | "half __ovld __cnfn length(half3 p);\n" |
| 37265 | "half __ovld __cnfn length(half4 p);\n" |
| 37266 | "#endif //cl_khr_fp16\n" |
| 37267 | "\n" |
| 37268 | "/**\n" |
| 37269 | " * Returns a vector in the same direction as p but with a\n" |
| 37270 | " * length of 1.\n" |
| 37271 | " */\n" |
| 37272 | "float __ovld __cnfn normalize(float p);\n" |
| 37273 | "float2 __ovld __cnfn normalize(float2 p);\n" |
| 37274 | "float3 __ovld __cnfn normalize(float3 p);\n" |
| 37275 | "float4 __ovld __cnfn normalize(float4 p);\n" |
| 37276 | "#ifdef cl_khr_fp64\n" |
| 37277 | "double __ovld __cnfn normalize(double p);\n" |
| 37278 | "double2 __ovld __cnfn normalize(double2 p);\n" |
| 37279 | "double3 __ovld __cnfn normalize(double3 p);\n" |
| 37280 | "double4 __ovld __cnfn normalize(double4 p);\n" |
| 37281 | "#endif //cl_khr_fp64\n" |
| 37282 | "#ifdef cl_khr_fp16\n" |
| 37283 | "half __ovld __cnfn normalize(half p);\n" |
| 37284 | "half2 __ovld __cnfn normalize(half2 p);\n" |
| 37285 | "half3 __ovld __cnfn normalize(half3 p);\n" |
| 37286 | "half4 __ovld __cnfn normalize(half4 p);\n" |
| 37287 | "#endif //cl_khr_fp16\n" |
| 37288 | "\n" |
| 37289 | "/**\n" |
| 37290 | " * Returns fast_length(p0 - p1).\n" |
| 37291 | " */\n" |
| 37292 | "float __ovld __cnfn fast_distance(float p0, float p1);\n" |
| 37293 | "float __ovld __cnfn fast_distance(float2 p0, float2 p1);\n" |
| 37294 | "float __ovld __cnfn fast_distance(float3 p0, float3 p1);\n" |
| 37295 | "float __ovld __cnfn fast_distance(float4 p0, float4 p1);\n" |
| 37296 | "#ifdef cl_khr_fp16\n" |
| 37297 | "half __ovld __cnfn fast_distance(half p0, half p1);\n" |
| 37298 | "half __ovld __cnfn fast_distance(half2 p0, half2 p1);\n" |
| 37299 | "half __ovld __cnfn fast_distance(half3 p0, half3 p1);\n" |
| 37300 | "half __ovld __cnfn fast_distance(half4 p0, half4 p1);\n" |
| 37301 | "#endif //cl_khr_fp16\n" |
| 37302 | "\n" |
| 37303 | "/**\n" |
| 37304 | " * Returns the length of vector p computed as:\n" |
| 37305 | " * half_sqrt(p.x2 + p.y2 + ...)\n" |
| 37306 | " */\n" |
| 37307 | "float __ovld __cnfn fast_length(float p);\n" |
| 37308 | "float __ovld __cnfn fast_length(float2 p);\n" |
| 37309 | "float __ovld __cnfn fast_length(float3 p);\n" |
| 37310 | "float __ovld __cnfn fast_length(float4 p);\n" |
| 37311 | "#ifdef cl_khr_fp16\n" |
| 37312 | "half __ovld __cnfn fast_length(half p);\n" |
| 37313 | "half __ovld __cnfn fast_length(half2 p);\n" |
| 37314 | "half __ovld __cnfn fast_length(half3 p);\n" |
| 37315 | "half __ovld __cnfn fast_length(half4 p);\n" |
| 37316 | "#endif //cl_khr_fp16\n" |
| 37317 | "\n" |
| 37318 | "/**\n" |
| 37319 | " * Returns a vector in the same direction as p but with a\n" |
| 37320 | " * length of 1. fast_normalize is computed as:\n" |
| 37321 | " * p * half_rsqrt (p.x^2 + p.y^2 + ... )\n" |
| 37322 | " * The result shall be within 8192 ulps error from the\n" |
| 37323 | " * infinitely precise result of\n" |
| 37324 | " * if (all(p == 0.0f))\n" |
| 37325 | " * result = p;\n" |
| 37326 | " * else\n" |
| 37327 | " * result = p / sqrt (p.x^2 + p.y^2 + ...);\n" |
| 37328 | " * with the following exceptions:\n" |
| 37329 | " * 1) If the sum of squares is greater than FLT_MAX\n" |
| 37330 | " * then the value of the floating-point values in the\n" |
| 37331 | " * result vector are undefined.\n" |
| 37332 | " * 2) If the sum of squares is less than FLT_MIN then\n" |
| 37333 | " * the implementation may return back p.\n" |
| 37334 | " * 3) If the device is in \"denorms are flushed to zero\"\n" |
| 37335 | " * mode, individual operand elements with magnitude\n" |
| 37336 | " * less than sqrt(FLT_MIN) may be flushed to zero\n" |
| 37337 | " * before proceeding with the calculation.\n" |
| 37338 | " */\n" |
| 37339 | "float __ovld __cnfn fast_normalize(float p);\n" |
| 37340 | "float2 __ovld __cnfn fast_normalize(float2 p);\n" |
| 37341 | "float3 __ovld __cnfn fast_normalize(float3 p);\n" |
| 37342 | "float4 __ovld __cnfn fast_normalize(float4 p);\n" |
| 37343 | "#ifdef cl_khr_fp16\n" |
| 37344 | "half __ovld __cnfn fast_normalize(half p);\n" |
| 37345 | "half2 __ovld __cnfn fast_normalize(half2 p);\n" |
| 37346 | "half3 __ovld __cnfn fast_normalize(half3 p);\n" |
| 37347 | "half4 __ovld __cnfn fast_normalize(half4 p);\n" |
| 37348 | "#endif //cl_khr_fp16\n" |
| 37349 | "\n" |
| 37350 | "// OpenCL v1.1 s6.11.6, v1.2 s6.12.6, v2.0 s6.13.6 - Relational Functions\n" |
| 37351 | "\n" |
| 37352 | "/**\n" |
| 37353 | " * intn isequal (floatn x, floatn y)\n" |
| 37354 | " * Returns the component-wise compare of x == y.\n" |
| 37355 | " */\n" |
| 37356 | "int __ovld __cnfn isequal(float x, float y);\n" |
| 37357 | "int2 __ovld __cnfn isequal(float2 x, float2 y);\n" |
| 37358 | "int3 __ovld __cnfn isequal(float3 x, float3 y);\n" |
| 37359 | "int4 __ovld __cnfn isequal(float4 x, float4 y);\n" |
| 37360 | "int8 __ovld __cnfn isequal(float8 x, float8 y);\n" |
| 37361 | "int16 __ovld __cnfn isequal(float16 x, float16 y);\n" |
| 37362 | "#ifdef cl_khr_fp64\n" |
| 37363 | "int __ovld __cnfn isequal(double x, double y);\n" |
| 37364 | "long2 __ovld __cnfn isequal(double2 x, double2 y);\n" |
| 37365 | "long3 __ovld __cnfn isequal(double3 x, double3 y);\n" |
| 37366 | "long4 __ovld __cnfn isequal(double4 x, double4 y);\n" |
| 37367 | "long8 __ovld __cnfn isequal(double8 x, double8 y);\n" |
| 37368 | "long16 __ovld __cnfn isequal(double16 x, double16 y);\n" |
| 37369 | "#endif //cl_khr_fp64\n" |
| 37370 | "#ifdef cl_khr_fp16\n" |
| 37371 | "int __ovld __cnfn isequal(half x, half y);\n" |
| 37372 | "short2 __ovld __cnfn isequal(half2 x, half2 y);\n" |
| 37373 | "short3 __ovld __cnfn isequal(half3 x, half3 y);\n" |
| 37374 | "short4 __ovld __cnfn isequal(half4 x, half4 y);\n" |
| 37375 | "short8 __ovld __cnfn isequal(half8 x, half8 y);\n" |
| 37376 | "short16 __ovld __cnfn isequal(half16 x, half16 y);\n" |
| 37377 | "#endif //cl_khr_fp16\n" |
| 37378 | "\n" |
| 37379 | "/**\n" |
| 37380 | " * Returns the component-wise compare of x != y.\n" |
| 37381 | " */\n" |
| 37382 | "int __ovld __cnfn isnotequal(float x, float y);\n" |
| 37383 | "int2 __ovld __cnfn isnotequal(float2 x, float2 y);\n" |
| 37384 | "int3 __ovld __cnfn isnotequal(float3 x, float3 y);\n" |
| 37385 | "int4 __ovld __cnfn isnotequal(float4 x, float4 y);\n" |
| 37386 | "int8 __ovld __cnfn isnotequal(float8 x, float8 y);\n" |
| 37387 | "int16 __ovld __cnfn isnotequal(float16 x, float16 y);\n" |
| 37388 | "#ifdef cl_khr_fp64\n" |
| 37389 | "int __ovld __cnfn isnotequal(double x, double y);\n" |
| 37390 | "long2 __ovld __cnfn isnotequal(double2 x, double2 y);\n" |
| 37391 | "long3 __ovld __cnfn isnotequal(double3 x, double3 y);\n" |
| 37392 | "long4 __ovld __cnfn isnotequal(double4 x, double4 y);\n" |
| 37393 | "long8 __ovld __cnfn isnotequal(double8 x, double8 y);\n" |
| 37394 | "long16 __ovld __cnfn isnotequal(double16 x, double16 y);\n" |
| 37395 | "#endif //cl_khr_fp64\n" |
| 37396 | "#ifdef cl_khr_fp16\n" |
| 37397 | "int __ovld __cnfn isnotequal(half x, half y);\n" |
| 37398 | "short2 __ovld __cnfn isnotequal(half2 x, half2 y);\n" |
| 37399 | "short3 __ovld __cnfn isnotequal(half3 x, half3 y);\n" |
| 37400 | "short4 __ovld __cnfn isnotequal(half4 x, half4 y);\n" |
| 37401 | "short8 __ovld __cnfn isnotequal(half8 x, half8 y);\n" |
| 37402 | "short16 __ovld __cnfn isnotequal(half16 x, half16 y);\n" |
| 37403 | "#endif //cl_khr_fp16\n" |
| 37404 | "\n" |
| 37405 | "/**\n" |
| 37406 | " * Returns the component-wise compare of x > y.\n" |
| 37407 | " */\n" |
| 37408 | "int __ovld __cnfn isgreater(float x, float y);\n" |
| 37409 | "int2 __ovld __cnfn isgreater(float2 x, float2 y);\n" |
| 37410 | "int3 __ovld __cnfn isgreater(float3 x, float3 y);\n" |
| 37411 | "int4 __ovld __cnfn isgreater(float4 x, float4 y);\n" |
| 37412 | "int8 __ovld __cnfn isgreater(float8 x, float8 y);\n" |
| 37413 | "int16 __ovld __cnfn isgreater(float16 x, float16 y);\n" |
| 37414 | "#ifdef cl_khr_fp64\n" |
| 37415 | "int __ovld __cnfn isgreater(double x, double y);\n" |
| 37416 | "long2 __ovld __cnfn isgreater(double2 x, double2 y);\n" |
| 37417 | "long3 __ovld __cnfn isgreater(double3 x, double3 y);\n" |
| 37418 | "long4 __ovld __cnfn isgreater(double4 x, double4 y);\n" |
| 37419 | "long8 __ovld __cnfn isgreater(double8 x, double8 y);\n" |
| 37420 | "long16 __ovld __cnfn isgreater(double16 x, double16 y);\n" |
| 37421 | "#endif //cl_khr_fp64\n" |
| 37422 | "#ifdef cl_khr_fp16\n" |
| 37423 | "int __ovld __cnfn isgreater(half x, half y);\n" |
| 37424 | "short2 __ovld __cnfn isgreater(half2 x, half2 y);\n" |
| 37425 | "short3 __ovld __cnfn isgreater(half3 x, half3 y);\n" |
| 37426 | "short4 __ovld __cnfn isgreater(half4 x, half4 y);\n" |
| 37427 | "short8 __ovld __cnfn isgreater(half8 x, half8 y);\n" |
| 37428 | "short16 __ovld __cnfn isgreater(half16 x, half16 y);\n" |
| 37429 | "#endif //cl_khr_fp16\n" |
| 37430 | "\n" |
| 37431 | "/**\n" |
| 37432 | " * Returns the component-wise compare of x >= y.\n" |
| 37433 | " */\n" |
| 37434 | "int __ovld __cnfn isgreaterequal(float x, float y);\n" |
| 37435 | "int2 __ovld __cnfn isgreaterequal(float2 x, float2 y);\n" |
| 37436 | "int3 __ovld __cnfn isgreaterequal(float3 x, float3 y);\n" |
| 37437 | "int4 __ovld __cnfn isgreaterequal(float4 x, float4 y);\n" |
| 37438 | "int8 __ovld __cnfn isgreaterequal(float8 x, float8 y);\n" |
| 37439 | "int16 __ovld __cnfn isgreaterequal(float16 x, float16 y);\n" |
| 37440 | "#ifdef cl_khr_fp64\n" |
| 37441 | "int __ovld __cnfn isgreaterequal(double x, double y);\n" |
| 37442 | "long2 __ovld __cnfn isgreaterequal(double2 x, double2 y);\n" |
| 37443 | "long3 __ovld __cnfn isgreaterequal(double3 x, double3 y);\n" |
| 37444 | "long4 __ovld __cnfn isgreaterequal(double4 x, double4 y);\n" |
| 37445 | "long8 __ovld __cnfn isgreaterequal(double8 x, double8 y);\n" |
| 37446 | "long16 __ovld __cnfn isgreaterequal(double16 x, double16 y);\n" |
| 37447 | "#endif //cl_khr_fp64\n" |
| 37448 | "#ifdef cl_khr_fp16\n" |
| 37449 | "int __ovld __cnfn isgreaterequal(half x, half y);\n" |
| 37450 | "short2 __ovld __cnfn isgreaterequal(half2 x, half2 y);\n" |
| 37451 | "short3 __ovld __cnfn isgreaterequal(half3 x, half3 y);\n" |
| 37452 | "short4 __ovld __cnfn isgreaterequal(half4 x, half4 y);\n" |
| 37453 | "short8 __ovld __cnfn isgreaterequal(half8 x, half8 y);\n" |
| 37454 | "short16 __ovld __cnfn isgreaterequal(half16 x, half16 y);\n" |
| 37455 | "#endif //cl_khr_fp16\n" |
| 37456 | "\n" |
| 37457 | "/**\n" |
| 37458 | " * Returns the component-wise compare of x < y.\n" |
| 37459 | " */\n" |
| 37460 | "int __ovld __cnfn isless(float x, float y);\n" |
| 37461 | "int2 __ovld __cnfn isless(float2 x, float2 y);\n" |
| 37462 | "int3 __ovld __cnfn isless(float3 x, float3 y);\n" |
| 37463 | "int4 __ovld __cnfn isless(float4 x, float4 y);\n" |
| 37464 | "int8 __ovld __cnfn isless(float8 x, float8 y);\n" |
| 37465 | "int16 __ovld __cnfn isless(float16 x, float16 y);\n" |
| 37466 | "#ifdef cl_khr_fp64\n" |
| 37467 | "int __ovld __cnfn isless(double x, double y);\n" |
| 37468 | "long2 __ovld __cnfn isless(double2 x, double2 y);\n" |
| 37469 | "long3 __ovld __cnfn isless(double3 x, double3 y);\n" |
| 37470 | "long4 __ovld __cnfn isless(double4 x, double4 y);\n" |
| 37471 | "long8 __ovld __cnfn isless(double8 x, double8 y);\n" |
| 37472 | "long16 __ovld __cnfn isless(double16 x, double16 y);\n" |
| 37473 | "#endif //cl_khr_fp64\n" |
| 37474 | "#ifdef cl_khr_fp16\n" |
| 37475 | "int __ovld __cnfn isless(half x, half y);\n" |
| 37476 | "short2 __ovld __cnfn isless(half2 x, half2 y);\n" |
| 37477 | "short3 __ovld __cnfn isless(half3 x, half3 y);\n" |
| 37478 | "short4 __ovld __cnfn isless(half4 x, half4 y);\n" |
| 37479 | "short8 __ovld __cnfn isless(half8 x, half8 y);\n" |
| 37480 | "short16 __ovld __cnfn isless(half16 x, half16 y);\n" |
| 37481 | "#endif //cl_khr_fp16\n" |
| 37482 | "\n" |
| 37483 | "/**\n" |
| 37484 | " * Returns the component-wise compare of x <= y.\n" |
| 37485 | " */\n" |
| 37486 | "int __ovld __cnfn islessequal(float x, float y);\n" |
| 37487 | "int2 __ovld __cnfn islessequal(float2 x, float2 y);\n" |
| 37488 | "int3 __ovld __cnfn islessequal(float3 x, float3 y);\n" |
| 37489 | "int4 __ovld __cnfn islessequal(float4 x, float4 y);\n" |
| 37490 | "int8 __ovld __cnfn islessequal(float8 x, float8 y);\n" |
| 37491 | "int16 __ovld __cnfn islessequal(float16 x, float16 y);\n" |
| 37492 | "#ifdef cl_khr_fp64\n" |
| 37493 | "int __ovld __cnfn islessequal(double x, double y);\n" |
| 37494 | "long2 __ovld __cnfn islessequal(double2 x, double2 y);\n" |
| 37495 | "long3 __ovld __cnfn islessequal(double3 x, double3 y);\n" |
| 37496 | "long4 __ovld __cnfn islessequal(double4 x, double4 y);\n" |
| 37497 | "long8 __ovld __cnfn islessequal(double8 x, double8 y);\n" |
| 37498 | "long16 __ovld __cnfn islessequal(double16 x, double16 y);\n" |
| 37499 | "#endif //cl_khr_fp64\n" |
| 37500 | "#ifdef cl_khr_fp16\n" |
| 37501 | "int __ovld __cnfn islessequal(half x, half y);\n" |
| 37502 | "short2 __ovld __cnfn islessequal(half2 x, half2 y);\n" |
| 37503 | "short3 __ovld __cnfn islessequal(half3 x, half3 y);\n" |
| 37504 | "short4 __ovld __cnfn islessequal(half4 x, half4 y);\n" |
| 37505 | "short8 __ovld __cnfn islessequal(half8 x, half8 y);\n" |
| 37506 | "short16 __ovld __cnfn islessequal(half16 x, half16 y);\n" |
| 37507 | "#endif //cl_khr_fp16\n" |
| 37508 | "\n" |
| 37509 | "/**\n" |
| 37510 | " * Returns the component-wise compare of\n" |
| 37511 | " * (x < y) || (x > y) .\n" |
| 37512 | " */\n" |
| 37513 | "int __ovld __cnfn islessgreater(float x, float y);\n" |
| 37514 | "int2 __ovld __cnfn islessgreater(float2 x, float2 y);\n" |
| 37515 | "int3 __ovld __cnfn islessgreater(float3 x, float3 y);\n" |
| 37516 | "int4 __ovld __cnfn islessgreater(float4 x, float4 y);\n" |
| 37517 | "int8 __ovld __cnfn islessgreater(float8 x, float8 y);\n" |
| 37518 | "int16 __ovld __cnfn islessgreater(float16 x, float16 y);\n" |
| 37519 | "#ifdef cl_khr_fp64\n" |
| 37520 | "int __ovld __cnfn islessgreater(double x, double y);\n" |
| 37521 | "long2 __ovld __cnfn islessgreater(double2 x, double2 y);\n" |
| 37522 | "long3 __ovld __cnfn islessgreater(double3 x, double3 y);\n" |
| 37523 | "long4 __ovld __cnfn islessgreater(double4 x, double4 y);\n" |
| 37524 | "long8 __ovld __cnfn islessgreater(double8 x, double8 y);\n" |
| 37525 | "long16 __ovld __cnfn islessgreater(double16 x, double16 y);\n" |
| 37526 | "#endif //cl_khr_fp64\n" |
| 37527 | "#ifdef cl_khr_fp16\n" |
| 37528 | "int __ovld __cnfn islessgreater(half x, half y);\n" |
| 37529 | "short2 __ovld __cnfn islessgreater(half2 x, half2 y);\n" |
| 37530 | "short3 __ovld __cnfn islessgreater(half3 x, half3 y);\n" |
| 37531 | "short4 __ovld __cnfn islessgreater(half4 x, half4 y);\n" |
| 37532 | "short8 __ovld __cnfn islessgreater(half8 x, half8 y);\n" |
| 37533 | "short16 __ovld __cnfn islessgreater(half16 x, half16 y);\n" |
| 37534 | "#endif //cl_khr_fp16\n" |
| 37535 | "\n" |
| 37536 | "/**\n" |
| 37537 | " * Test for finite value.\n" |
| 37538 | " */\n" |
| 37539 | "int __ovld __cnfn isfinite(float);\n" |
| 37540 | "int2 __ovld __cnfn isfinite(float2);\n" |
| 37541 | "int3 __ovld __cnfn isfinite(float3);\n" |
| 37542 | "int4 __ovld __cnfn isfinite(float4);\n" |
| 37543 | "int8 __ovld __cnfn isfinite(float8);\n" |
| 37544 | "int16 __ovld __cnfn isfinite(float16);\n" |
| 37545 | "#ifdef cl_khr_fp64\n" |
| 37546 | "int __ovld __cnfn isfinite(double);\n" |
| 37547 | "long2 __ovld __cnfn isfinite(double2);\n" |
| 37548 | "long3 __ovld __cnfn isfinite(double3);\n" |
| 37549 | "long4 __ovld __cnfn isfinite(double4);\n" |
| 37550 | "long8 __ovld __cnfn isfinite(double8);\n" |
| 37551 | "long16 __ovld __cnfn isfinite(double16);\n" |
| 37552 | "#endif //cl_khr_fp64\n" |
| 37553 | "#ifdef cl_khr_fp16\n" |
| 37554 | "int __ovld __cnfn isfinite(half);\n" |
| 37555 | "short2 __ovld __cnfn isfinite(half2);\n" |
| 37556 | "short3 __ovld __cnfn isfinite(half3);\n" |
| 37557 | "short4 __ovld __cnfn isfinite(half4);\n" |
| 37558 | "short8 __ovld __cnfn isfinite(half8);\n" |
| 37559 | "short16 __ovld __cnfn isfinite(half16);\n" |
| 37560 | "#endif //cl_khr_fp16\n" |
| 37561 | "\n" |
| 37562 | "/**\n" |
| 37563 | " * Test for infinity value (+ve or -ve) .\n" |
| 37564 | " */\n" |
| 37565 | "int __ovld __cnfn isinf(float);\n" |
| 37566 | "int2 __ovld __cnfn isinf(float2);\n" |
| 37567 | "int3 __ovld __cnfn isinf(float3);\n" |
| 37568 | "int4 __ovld __cnfn isinf(float4);\n" |
| 37569 | "int8 __ovld __cnfn isinf(float8);\n" |
| 37570 | "int16 __ovld __cnfn isinf(float16);\n" |
| 37571 | "#ifdef cl_khr_fp64\n" |
| 37572 | "int __ovld __cnfn isinf(double);\n" |
| 37573 | "long2 __ovld __cnfn isinf(double2);\n" |
| 37574 | "long3 __ovld __cnfn isinf(double3);\n" |
| 37575 | "long4 __ovld __cnfn isinf(double4);\n" |
| 37576 | "long8 __ovld __cnfn isinf(double8);\n" |
| 37577 | "long16 __ovld __cnfn isinf(double16);\n" |
| 37578 | "#endif //cl_khr_fp64\n" |
| 37579 | "#ifdef cl_khr_fp16\n" |
| 37580 | "int __ovld __cnfn isinf(half);\n" |
| 37581 | "short2 __ovld __cnfn isinf(half2);\n" |
| 37582 | "short3 __ovld __cnfn isinf(half3);\n" |
| 37583 | "short4 __ovld __cnfn isinf(half4);\n" |
| 37584 | "short8 __ovld __cnfn isinf(half8);\n" |
| 37585 | "short16 __ovld __cnfn isinf(half16);\n" |
| 37586 | "#endif //cl_khr_fp16\n" |
| 37587 | "\n" |
| 37588 | "/**\n" |
| 37589 | " * Test for a NaN.\n" |
| 37590 | " */\n" |
| 37591 | "int __ovld __cnfn isnan(float);\n" |
| 37592 | "int2 __ovld __cnfn isnan(float2);\n" |
| 37593 | "int3 __ovld __cnfn isnan(float3);\n" |
| 37594 | "int4 __ovld __cnfn isnan(float4);\n" |
| 37595 | "int8 __ovld __cnfn isnan(float8);\n" |
| 37596 | "int16 __ovld __cnfn isnan(float16);\n" |
| 37597 | "#ifdef cl_khr_fp64\n" |
| 37598 | "int __ovld __cnfn isnan(double);\n" |
| 37599 | "long2 __ovld __cnfn isnan(double2);\n" |
| 37600 | "long3 __ovld __cnfn isnan(double3);\n" |
| 37601 | "long4 __ovld __cnfn isnan(double4);\n" |
| 37602 | "long8 __ovld __cnfn isnan(double8);\n" |
| 37603 | "long16 __ovld __cnfn isnan(double16);\n" |
| 37604 | "#endif //cl_khr_fp64\n" |
| 37605 | "#ifdef cl_khr_fp16\n" |
| 37606 | "int __ovld __cnfn isnan(half);\n" |
| 37607 | "short2 __ovld __cnfn isnan(half2);\n" |
| 37608 | "short3 __ovld __cnfn isnan(half3);\n" |
| 37609 | "short4 __ovld __cnfn isnan(half4);\n" |
| 37610 | "short8 __ovld __cnfn isnan(half8);\n" |
| 37611 | "short16 __ovld __cnfn isnan(half16);\n" |
| 37612 | "#endif //cl_khr_fp16\n" |
| 37613 | "\n" |
| 37614 | "/**\n" |
| 37615 | " * Test for a normal value.\n" |
| 37616 | " */\n" |
| 37617 | "int __ovld __cnfn isnormal(float);\n" |
| 37618 | "int2 __ovld __cnfn isnormal(float2);\n" |
| 37619 | "int3 __ovld __cnfn isnormal(float3);\n" |
| 37620 | "int4 __ovld __cnfn isnormal(float4);\n" |
| 37621 | "int8 __ovld __cnfn isnormal(float8);\n" |
| 37622 | "int16 __ovld __cnfn isnormal(float16);\n" |
| 37623 | "#ifdef cl_khr_fp64\n" |
| 37624 | "int __ovld __cnfn isnormal(double);\n" |
| 37625 | "long2 __ovld __cnfn isnormal(double2);\n" |
| 37626 | "long3 __ovld __cnfn isnormal(double3);\n" |
| 37627 | "long4 __ovld __cnfn isnormal(double4);\n" |
| 37628 | "long8 __ovld __cnfn isnormal(double8);\n" |
| 37629 | "long16 __ovld __cnfn isnormal(double16);\n" |
| 37630 | "#endif //cl_khr_fp64\n" |
| 37631 | "#ifdef cl_khr_fp16\n" |
| 37632 | "int __ovld __cnfn isnormal(half);\n" |
| 37633 | "short2 __ovld __cnfn isnormal(half2);\n" |
| 37634 | "short3 __ovld __cnfn isnormal(half3);\n" |
| 37635 | "short4 __ovld __cnfn isnormal(half4);\n" |
| 37636 | "short8 __ovld __cnfn isnormal(half8);\n" |
| 37637 | "short16 __ovld __cnfn isnormal(half16);\n" |
| 37638 | "#endif //cl_khr_fp16\n" |
| 37639 | "\n" |
| 37640 | "/**\n" |
| 37641 | " * Test if arguments are ordered. isordered() takes\n" |
| 37642 | " * arguments x and y, and returns the result\n" |
| 37643 | " * isequal(x, x) && isequal(y, y).\n" |
| 37644 | " */\n" |
| 37645 | "int __ovld __cnfn isordered(float x, float y);\n" |
| 37646 | "int2 __ovld __cnfn isordered(float2 x, float2 y);\n" |
| 37647 | "int3 __ovld __cnfn isordered(float3 x, float3 y);\n" |
| 37648 | "int4 __ovld __cnfn isordered(float4 x, float4 y);\n" |
| 37649 | "int8 __ovld __cnfn isordered(float8 x, float8 y);\n" |
| 37650 | "int16 __ovld __cnfn isordered(float16 x, float16 y);\n" |
| 37651 | "#ifdef cl_khr_fp64\n" |
| 37652 | "int __ovld __cnfn isordered(double x, double y);\n" |
| 37653 | "long2 __ovld __cnfn isordered(double2 x, double2 y);\n" |
| 37654 | "long3 __ovld __cnfn isordered(double3 x, double3 y);\n" |
| 37655 | "long4 __ovld __cnfn isordered(double4 x, double4 y);\n" |
| 37656 | "long8 __ovld __cnfn isordered(double8 x, double8 y);\n" |
| 37657 | "long16 __ovld __cnfn isordered(double16 x, double16 y);\n" |
| 37658 | "#endif //cl_khr_fp64\n" |
| 37659 | "#ifdef cl_khr_fp16\n" |
| 37660 | "int __ovld __cnfn isordered(half x, half y);\n" |
| 37661 | "short2 __ovld __cnfn isordered(half2 x, half2 y);\n" |
| 37662 | "short3 __ovld __cnfn isordered(half3 x, half3 y);\n" |
| 37663 | "short4 __ovld __cnfn isordered(half4 x, half4 y);\n" |
| 37664 | "short8 __ovld __cnfn isordered(half8 x, half8 y);\n" |
| 37665 | "short16 __ovld __cnfn isordered(half16 x, half16 y);\n" |
| 37666 | "#endif //cl_khr_fp16\n" |
| 37667 | "\n" |
| 37668 | "/**\n" |
| 37669 | " * Test if arguments are unordered. isunordered()\n" |
| 37670 | " * takes arguments x and y, returning non-zero if x or y\n" |
| 37671 | " * is NaN, and zero otherwise.\n" |
| 37672 | " */\n" |
| 37673 | "int __ovld __cnfn isunordered(float x, float y);\n" |
| 37674 | "int2 __ovld __cnfn isunordered(float2 x, float2 y);\n" |
| 37675 | "int3 __ovld __cnfn isunordered(float3 x, float3 y);\n" |
| 37676 | "int4 __ovld __cnfn isunordered(float4 x, float4 y);\n" |
| 37677 | "int8 __ovld __cnfn isunordered(float8 x, float8 y);\n" |
| 37678 | "int16 __ovld __cnfn isunordered(float16 x, float16 y);\n" |
| 37679 | "#ifdef cl_khr_fp64\n" |
| 37680 | "int __ovld __cnfn isunordered(double x, double y);\n" |
| 37681 | "long2 __ovld __cnfn isunordered(double2 x, double2 y);\n" |
| 37682 | "long3 __ovld __cnfn isunordered(double3 x, double3 y);\n" |
| 37683 | "long4 __ovld __cnfn isunordered(double4 x, double4 y);\n" |
| 37684 | "long8 __ovld __cnfn isunordered(double8 x, double8 y);\n" |
| 37685 | "long16 __ovld __cnfn isunordered(double16 x, double16 y);\n" |
| 37686 | "#endif //cl_khr_fp64\n" |
| 37687 | "#ifdef cl_khr_fp16\n" |
| 37688 | "int __ovld __cnfn isunordered(half x, half y);\n" |
| 37689 | "short2 __ovld __cnfn isunordered(half2 x, half2 y);\n" |
| 37690 | "short3 __ovld __cnfn isunordered(half3 x, half3 y);\n" |
| 37691 | "short4 __ovld __cnfn isunordered(half4 x, half4 y);\n" |
| 37692 | "short8 __ovld __cnfn isunordered(half8 x, half8 y);\n" |
| 37693 | "short16 __ovld __cnfn isunordered(half16 x, half16 y);\n" |
| 37694 | "#endif //cl_khr_fp16\n" |
| 37695 | "\n" |
| 37696 | "/**\n" |
| 37697 | " * Test for sign bit. The scalar version of the function\n" |
| 37698 | " * returns a 1 if the sign bit in the float is set else returns\n" |
| 37699 | " * 0. The vector version of the function returns the\n" |
| 37700 | " * following for each component in floatn: a -1 if the\n" |
| 37701 | " * sign bit in the float is set else returns 0.\n" |
| 37702 | " */\n" |
| 37703 | "int __ovld __cnfn signbit(float);\n" |
| 37704 | "int2 __ovld __cnfn signbit(float2);\n" |
| 37705 | "int3 __ovld __cnfn signbit(float3);\n" |
| 37706 | "int4 __ovld __cnfn signbit(float4);\n" |
| 37707 | "int8 __ovld __cnfn signbit(float8);\n" |
| 37708 | "int16 __ovld __cnfn signbit(float16);\n" |
| 37709 | "#ifdef cl_khr_fp64\n" |
| 37710 | "int __ovld __cnfn signbit(double);\n" |
| 37711 | "long2 __ovld __cnfn signbit(double2);\n" |
| 37712 | "long3 __ovld __cnfn signbit(double3);\n" |
| 37713 | "long4 __ovld __cnfn signbit(double4);\n" |
| 37714 | "long8 __ovld __cnfn signbit(double8);\n" |
| 37715 | "long16 __ovld __cnfn signbit(double16);\n" |
| 37716 | "#endif //cl_khr_fp64\n" |
| 37717 | "#ifdef cl_khr_fp16\n" |
| 37718 | "int __ovld __cnfn signbit(half);\n" |
| 37719 | "short2 __ovld __cnfn signbit(half2);\n" |
| 37720 | "short3 __ovld __cnfn signbit(half3);\n" |
| 37721 | "short4 __ovld __cnfn signbit(half4);\n" |
| 37722 | "short8 __ovld __cnfn signbit(half8);\n" |
| 37723 | "short16 __ovld __cnfn signbit(half16);\n" |
| 37724 | "#endif //cl_khr_fp16\n" |
| 37725 | "\n" |
| 37726 | "/**\n" |
| 37727 | " * Returns 1 if the most significant bit in any component\n" |
| 37728 | " * of x is set; otherwise returns 0.\n" |
| 37729 | " */\n" |
| 37730 | "int __ovld __cnfn any(char x);\n" |
| 37731 | "int __ovld __cnfn any(char2 x);\n" |
| 37732 | "int __ovld __cnfn any(char3 x);\n" |
| 37733 | "int __ovld __cnfn any(char4 x);\n" |
| 37734 | "int __ovld __cnfn any(char8 x);\n" |
| 37735 | "int __ovld __cnfn any(char16 x);\n" |
| 37736 | "int __ovld __cnfn any(short x);\n" |
| 37737 | "int __ovld __cnfn any(short2 x);\n" |
| 37738 | "int __ovld __cnfn any(short3 x);\n" |
| 37739 | "int __ovld __cnfn any(short4 x);\n" |
| 37740 | "int __ovld __cnfn any(short8 x);\n" |
| 37741 | "int __ovld __cnfn any(short16 x);\n" |
| 37742 | "int __ovld __cnfn any(int x);\n" |
| 37743 | "int __ovld __cnfn any(int2 x);\n" |
| 37744 | "int __ovld __cnfn any(int3 x);\n" |
| 37745 | "int __ovld __cnfn any(int4 x);\n" |
| 37746 | "int __ovld __cnfn any(int8 x);\n" |
| 37747 | "int __ovld __cnfn any(int16 x);\n" |
| 37748 | "int __ovld __cnfn any(long x);\n" |
| 37749 | "int __ovld __cnfn any(long2 x);\n" |
| 37750 | "int __ovld __cnfn any(long3 x);\n" |
| 37751 | "int __ovld __cnfn any(long4 x);\n" |
| 37752 | "int __ovld __cnfn any(long8 x);\n" |
| 37753 | "int __ovld __cnfn any(long16 x);\n" |
| 37754 | "\n" |
| 37755 | "/**\n" |
| 37756 | " * Returns 1 if the most significant bit in all components\n" |
| 37757 | " * of x is set; otherwise returns 0.\n" |
| 37758 | " */\n" |
| 37759 | "int __ovld __cnfn all(char x);\n" |
| 37760 | "int __ovld __cnfn all(char2 x);\n" |
| 37761 | "int __ovld __cnfn all(char3 x);\n" |
| 37762 | "int __ovld __cnfn all(char4 x);\n" |
| 37763 | "int __ovld __cnfn all(char8 x);\n" |
| 37764 | "int __ovld __cnfn all(char16 x);\n" |
| 37765 | "int __ovld __cnfn all(short x);\n" |
| 37766 | "int __ovld __cnfn all(short2 x);\n" |
| 37767 | "int __ovld __cnfn all(short3 x);\n" |
| 37768 | "int __ovld __cnfn all(short4 x);\n" |
| 37769 | "int __ovld __cnfn all(short8 x);\n" |
| 37770 | "int __ovld __cnfn all(short16 x);\n" |
| 37771 | "int __ovld __cnfn all(int x);\n" |
| 37772 | "int __ovld __cnfn all(int2 x);\n" |
| 37773 | "int __ovld __cnfn all(int3 x);\n" |
| 37774 | "int __ovld __cnfn all(int4 x);\n" |
| 37775 | "int __ovld __cnfn all(int8 x);\n" |
| 37776 | "int __ovld __cnfn all(int16 x);\n" |
| 37777 | "int __ovld __cnfn all(long x);\n" |
| 37778 | "int __ovld __cnfn all(long2 x);\n" |
| 37779 | "int __ovld __cnfn all(long3 x);\n" |
| 37780 | "int __ovld __cnfn all(long4 x);\n" |
| 37781 | "int __ovld __cnfn all(long8 x);\n" |
| 37782 | "int __ovld __cnfn all(long16 x);\n" |
| 37783 | "\n" |
| 37784 | "/**\n" |
| 37785 | " * Each bit of the result is the corresponding bit of a if\n" |
| 37786 | " * the corresponding bit of c is 0. Otherwise it is the\n" |
| 37787 | " * corresponding bit of b.\n" |
| 37788 | " */\n" |
| 37789 | "char __ovld __cnfn bitselect(char a, char b, char c);\n" |
| 37790 | "uchar __ovld __cnfn bitselect(uchar a, uchar b, uchar c);\n" |
| 37791 | "char2 __ovld __cnfn bitselect(char2 a, char2 b, char2 c);\n" |
| 37792 | "uchar2 __ovld __cnfn bitselect(uchar2 a, uchar2 b, uchar2 c);\n" |
| 37793 | "char3 __ovld __cnfn bitselect(char3 a, char3 b, char3 c);\n" |
| 37794 | "uchar3 __ovld __cnfn bitselect(uchar3 a, uchar3 b, uchar3 c);\n" |
| 37795 | "char4 __ovld __cnfn bitselect(char4 a, char4 b, char4 c);\n" |
| 37796 | "uchar4 __ovld __cnfn bitselect(uchar4 a, uchar4 b, uchar4 c);\n" |
| 37797 | "char8 __ovld __cnfn bitselect(char8 a, char8 b, char8 c);\n" |
| 37798 | "uchar8 __ovld __cnfn bitselect(uchar8 a, uchar8 b, uchar8 c);\n" |
| 37799 | "char16 __ovld __cnfn bitselect(char16 a, char16 b, char16 c);\n" |
| 37800 | "uchar16 __ovld __cnfn bitselect(uchar16 a, uchar16 b, uchar16 c);\n" |
| 37801 | "short __ovld __cnfn bitselect(short a, short b, short c);\n" |
| 37802 | "ushort __ovld __cnfn bitselect(ushort a, ushort b, ushort c);\n" |
| 37803 | "short2 __ovld __cnfn bitselect(short2 a, short2 b, short2 c);\n" |
| 37804 | "ushort2 __ovld __cnfn bitselect(ushort2 a, ushort2 b, ushort2 c);\n" |
| 37805 | "short3 __ovld __cnfn bitselect(short3 a, short3 b, short3 c);\n" |
| 37806 | "ushort3 __ovld __cnfn bitselect(ushort3 a, ushort3 b, ushort3 c);\n" |
| 37807 | "short4 __ovld __cnfn bitselect(short4 a, short4 b, short4 c);\n" |
| 37808 | "ushort4 __ovld __cnfn bitselect(ushort4 a, ushort4 b, ushort4 c);\n" |
| 37809 | "short8 __ovld __cnfn bitselect(short8 a, short8 b, short8 c);\n" |
| 37810 | "ushort8 __ovld __cnfn bitselect(ushort8 a, ushort8 b, ushort8 c);\n" |
| 37811 | "short16 __ovld __cnfn bitselect(short16 a, short16 b, short16 c);\n" |
| 37812 | "ushort16 __ovld __cnfn bitselect(ushort16 a, ushort16 b, ushort16 c);\n" |
| 37813 | "int __ovld __cnfn bitselect(int a, int b, int c);\n" |
| 37814 | "uint __ovld __cnfn bitselect(uint a, uint b, uint c);\n" |
| 37815 | "int2 __ovld __cnfn bitselect(int2 a, int2 b, int2 c);\n" |
| 37816 | "uint2 __ovld __cnfn bitselect(uint2 a, uint2 b, uint2 c);\n" |
| 37817 | "int3 __ovld __cnfn bitselect(int3 a, int3 b, int3 c);\n" |
| 37818 | "uint3 __ovld __cnfn bitselect(uint3 a, uint3 b, uint3 c);\n" |
| 37819 | "int4 __ovld __cnfn bitselect(int4 a, int4 b, int4 c);\n" |
| 37820 | "uint4 __ovld __cnfn bitselect(uint4 a, uint4 b, uint4 c);\n" |
| 37821 | "int8 __ovld __cnfn bitselect(int8 a, int8 b, int8 c);\n" |
| 37822 | "uint8 __ovld __cnfn bitselect(uint8 a, uint8 b, uint8 c);\n" |
| 37823 | "int16 __ovld __cnfn bitselect(int16 a, int16 b, int16 c);\n" |
| 37824 | "uint16 __ovld __cnfn bitselect(uint16 a, uint16 b, uint16 c);\n" |
| 37825 | "long __ovld __cnfn bitselect(long a, long b, long c);\n" |
| 37826 | "ulong __ovld __cnfn bitselect(ulong a, ulong b, ulong c);\n" |
| 37827 | "long2 __ovld __cnfn bitselect(long2 a, long2 b, long2 c);\n" |
| 37828 | "ulong2 __ovld __cnfn bitselect(ulong2 a, ulong2 b, ulong2 c);\n" |
| 37829 | "long3 __ovld __cnfn bitselect(long3 a, long3 b, long3 c);\n" |
| 37830 | "ulong3 __ovld __cnfn bitselect(ulong3 a, ulong3 b, ulong3 c);\n" |
| 37831 | "long4 __ovld __cnfn bitselect(long4 a, long4 b, long4 c);\n" |
| 37832 | "ulong4 __ovld __cnfn bitselect(ulong4 a, ulong4 b, ulong4 c);\n" |
| 37833 | "long8 __ovld __cnfn bitselect(long8 a, long8 b, long8 c);\n" |
| 37834 | "ulong8 __ovld __cnfn bitselect(ulong8 a, ulong8 b, ulong8 c);\n" |
| 37835 | "long16 __ovld __cnfn bitselect(long16 a, long16 b, long16 c);\n" |
| 37836 | "ulong16 __ovld __cnfn bitselect(ulong16 a, ulong16 b, ulong16 c);\n" |
| 37837 | "float __ovld __cnfn bitselect(float a, float b, float c);\n" |
| 37838 | "float2 __ovld __cnfn bitselect(float2 a, float2 b, float2 c);\n" |
| 37839 | "float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c);\n" |
| 37840 | "float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c);\n" |
| 37841 | "float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c);\n" |
| 37842 | "float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c);\n" |
| 37843 | "#ifdef cl_khr_fp64\n" |
| 37844 | "double __ovld __cnfn bitselect(double a, double b, double c);\n" |
| 37845 | "double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c);\n" |
| 37846 | "double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c);\n" |
| 37847 | "double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c);\n" |
| 37848 | "double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c);\n" |
| 37849 | "double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c);\n" |
| 37850 | "#endif //cl_khr_fp64\n" |
| 37851 | "#ifdef cl_khr_fp16\n" |
| 37852 | "half __ovld __cnfn bitselect(half a, half b, half c);\n" |
| 37853 | "half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c);\n" |
| 37854 | "half3 __ovld __cnfn bitselect(half3 a, half3 b, half3 c);\n" |
| 37855 | "half4 __ovld __cnfn bitselect(half4 a, half4 b, half4 c);\n" |
| 37856 | "half8 __ovld __cnfn bitselect(half8 a, half8 b, half8 c);\n" |
| 37857 | "half16 __ovld __cnfn bitselect(half16 a, half16 b, half16 c);\n" |
| 37858 | "#endif //cl_khr_fp16\n" |
| 37859 | "\n" |
| 37860 | "/**\n" |
| 37861 | " * For each component of a vector type,\n" |
| 37862 | " * result[i] = if MSB of c[i] is set ? b[i] : a[i].\n" |
| 37863 | " * For a scalar type, result = c ? b : a.\n" |
| 37864 | " * b and a must have the same type.\n" |
| 37865 | " * c must have the same number of elements and bits as a.\n" |
| 37866 | " */\n" |
| 37867 | "char __ovld __cnfn select(char a, char b, char c);\n" |
| 37868 | "uchar __ovld __cnfn select(uchar a, uchar b, char c);\n" |
| 37869 | "char2 __ovld __cnfn select(char2 a, char2 b, char2 c);\n" |
| 37870 | "uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, char2 c);\n" |
| 37871 | "char3 __ovld __cnfn select(char3 a, char3 b, char3 c);\n" |
| 37872 | "uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, char3 c);\n" |
| 37873 | "char4 __ovld __cnfn select(char4 a, char4 b, char4 c);\n" |
| 37874 | "uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, char4 c);\n" |
| 37875 | "char8 __ovld __cnfn select(char8 a, char8 b, char8 c);\n" |
| 37876 | "uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, char8 c);\n" |
| 37877 | "char16 __ovld __cnfn select(char16 a, char16 b, char16 c);\n" |
| 37878 | "uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, char16 c);\n" |
| 37879 | "\n" |
| 37880 | "short __ovld __cnfn select(short a, short b, short c);\n" |
| 37881 | "ushort __ovld __cnfn select(ushort a, ushort b, short c);\n" |
| 37882 | "short2 __ovld __cnfn select(short2 a, short2 b, short2 c);\n" |
| 37883 | "ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, short2 c);\n" |
| 37884 | "short3 __ovld __cnfn select(short3 a, short3 b, short3 c);\n" |
| 37885 | "ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, short3 c);\n" |
| 37886 | "short4 __ovld __cnfn select(short4 a, short4 b, short4 c);\n" |
| 37887 | "ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, short4 c);\n" |
| 37888 | "short8 __ovld __cnfn select(short8 a, short8 b, short8 c);\n" |
| 37889 | "ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, short8 c);\n" |
| 37890 | "short16 __ovld __cnfn select(short16 a, short16 b, short16 c);\n" |
| 37891 | "ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, short16 c);\n" |
| 37892 | "\n" |
| 37893 | "int __ovld __cnfn select(int a, int b, int c);\n" |
| 37894 | "uint __ovld __cnfn select(uint a, uint b, int c);\n" |
| 37895 | "int2 __ovld __cnfn select(int2 a, int2 b, int2 c);\n" |
| 37896 | "uint2 __ovld __cnfn select(uint2 a, uint2 b, int2 c);\n" |
| 37897 | "int3 __ovld __cnfn select(int3 a, int3 b, int3 c);\n" |
| 37898 | "uint3 __ovld __cnfn select(uint3 a, uint3 b, int3 c);\n" |
| 37899 | "int4 __ovld __cnfn select(int4 a, int4 b, int4 c);\n" |
| 37900 | "uint4 __ovld __cnfn select(uint4 a, uint4 b, int4 c);\n" |
| 37901 | "int8 __ovld __cnfn select(int8 a, int8 b, int8 c);\n" |
| 37902 | "uint8 __ovld __cnfn select(uint8 a, uint8 b, int8 c);\n" |
| 37903 | "int16 __ovld __cnfn select(int16 a, int16 b, int16 c);\n" |
| 37904 | "uint16 __ovld __cnfn select(uint16 a, uint16 b, int16 c);\n" |
| 37905 | "float __ovld __cnfn select(float a, float b, int c);\n" |
| 37906 | "float2 __ovld __cnfn select(float2 a, float2 b, int2 c);\n" |
| 37907 | "float3 __ovld __cnfn select(float3 a, float3 b, int3 c);\n" |
| 37908 | "float4 __ovld __cnfn select(float4 a, float4 b, int4 c);\n" |
| 37909 | "float8 __ovld __cnfn select(float8 a, float8 b, int8 c);\n" |
| 37910 | "float16 __ovld __cnfn select(float16 a, float16 b, int16 c);\n" |
| 37911 | "\n" |
| 37912 | "long __ovld __cnfn select(long a, long b, long c);\n" |
| 37913 | "ulong __ovld __cnfn select(ulong a, ulong b, long c);\n" |
| 37914 | "long2 __ovld __cnfn select(long2 a, long2 b, long2 c);\n" |
| 37915 | "ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, long2 c);\n" |
| 37916 | "long3 __ovld __cnfn select(long3 a, long3 b, long3 c);\n" |
| 37917 | "ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, long3 c);\n" |
| 37918 | "long4 __ovld __cnfn select(long4 a, long4 b, long4 c);\n" |
| 37919 | "ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, long4 c);\n" |
| 37920 | "long8 __ovld __cnfn select(long8 a, long8 b, long8 c);\n" |
| 37921 | "ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, long8 c);\n" |
| 37922 | "long16 __ovld __cnfn select(long16 a, long16 b, long16 c);\n" |
| 37923 | "ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, long16 c);\n" |
| 37924 | "\n" |
| 37925 | "char __ovld __cnfn select(char a, char b, uchar c);\n" |
| 37926 | "uchar __ovld __cnfn select(uchar a, uchar b, uchar c);\n" |
| 37927 | "char2 __ovld __cnfn select(char2 a, char2 b, uchar2 c);\n" |
| 37928 | "uchar2 __ovld __cnfn select(uchar2 a, uchar2 b, uchar2 c);\n" |
| 37929 | "char3 __ovld __cnfn select(char3 a, char3 b, uchar3 c);\n" |
| 37930 | "uchar3 __ovld __cnfn select(uchar3 a, uchar3 b, uchar3 c);\n" |
| 37931 | "char4 __ovld __cnfn select(char4 a, char4 b, uchar4 c);\n" |
| 37932 | "uchar4 __ovld __cnfn select(uchar4 a, uchar4 b, uchar4 c);\n" |
| 37933 | "char8 __ovld __cnfn select(char8 a, char8 b, uchar8 c);\n" |
| 37934 | "uchar8 __ovld __cnfn select(uchar8 a, uchar8 b, uchar8 c);\n" |
| 37935 | "char16 __ovld __cnfn select(char16 a, char16 b, uchar16 c);\n" |
| 37936 | "uchar16 __ovld __cnfn select(uchar16 a, uchar16 b, uchar16 c);\n" |
| 37937 | "\n" |
| 37938 | "short __ovld __cnfn select(short a, short b, ushort c);\n" |
| 37939 | "ushort __ovld __cnfn select(ushort a, ushort b, ushort c);\n" |
| 37940 | "short2 __ovld __cnfn select(short2 a, short2 b, ushort2 c);\n" |
| 37941 | "ushort2 __ovld __cnfn select(ushort2 a, ushort2 b, ushort2 c);\n" |
| 37942 | "short3 __ovld __cnfn select(short3 a, short3 b, ushort3 c);\n" |
| 37943 | "ushort3 __ovld __cnfn select(ushort3 a, ushort3 b, ushort3 c);\n" |
| 37944 | "short4 __ovld __cnfn select(short4 a, short4 b, ushort4 c);\n" |
| 37945 | "ushort4 __ovld __cnfn select(ushort4 a, ushort4 b, ushort4 c);\n" |
| 37946 | "short8 __ovld __cnfn select(short8 a, short8 b, ushort8 c);\n" |
| 37947 | "ushort8 __ovld __cnfn select(ushort8 a, ushort8 b, ushort8 c);\n" |
| 37948 | "short16 __ovld __cnfn select(short16 a, short16 b, ushort16 c);\n" |
| 37949 | "ushort16 __ovld __cnfn select(ushort16 a, ushort16 b, ushort16 c);\n" |
| 37950 | "\n" |
| 37951 | "int __ovld __cnfn select(int a, int b, uint c);\n" |
| 37952 | "uint __ovld __cnfn select(uint a, uint b, uint c);\n" |
| 37953 | "int2 __ovld __cnfn select(int2 a, int2 b, uint2 c);\n" |
| 37954 | "uint2 __ovld __cnfn select(uint2 a, uint2 b, uint2 c);\n" |
| 37955 | "int3 __ovld __cnfn select(int3 a, int3 b, uint3 c);\n" |
| 37956 | "uint3 __ovld __cnfn select(uint3 a, uint3 b, uint3 c);\n" |
| 37957 | "int4 __ovld __cnfn select(int4 a, int4 b, uint4 c);\n" |
| 37958 | "uint4 __ovld __cnfn select(uint4 a, uint4 b, uint4 c);\n" |
| 37959 | "int8 __ovld __cnfn select(int8 a, int8 b, uint8 c);\n" |
| 37960 | "uint8 __ovld __cnfn select(uint8 a, uint8 b, uint8 c);\n" |
| 37961 | "int16 __ovld __cnfn select(int16 a, int16 b, uint16 c);\n" |
| 37962 | "uint16 __ovld __cnfn select(uint16 a, uint16 b, uint16 c);\n" |
| 37963 | "float __ovld __cnfn select(float a, float b, uint c);\n" |
| 37964 | "float2 __ovld __cnfn select(float2 a, float2 b, uint2 c);\n" |
| 37965 | "float3 __ovld __cnfn select(float3 a, float3 b, uint3 c);\n" |
| 37966 | "float4 __ovld __cnfn select(float4 a, float4 b, uint4 c);\n" |
| 37967 | "float8 __ovld __cnfn select(float8 a, float8 b, uint8 c);\n" |
| 37968 | "float16 __ovld __cnfn select(float16 a, float16 b, uint16 c);\n" |
| 37969 | "\n" |
| 37970 | "long __ovld __cnfn select(long a, long b, ulong c);\n" |
| 37971 | "ulong __ovld __cnfn select(ulong a, ulong b, ulong c);\n" |
| 37972 | "long2 __ovld __cnfn select(long2 a, long2 b, ulong2 c);\n" |
| 37973 | "ulong2 __ovld __cnfn select(ulong2 a, ulong2 b, ulong2 c);\n" |
| 37974 | "long3 __ovld __cnfn select(long3 a, long3 b, ulong3 c);\n" |
| 37975 | "ulong3 __ovld __cnfn select(ulong3 a, ulong3 b, ulong3 c);\n" |
| 37976 | "long4 __ovld __cnfn select(long4 a, long4 b, ulong4 c);\n" |
| 37977 | "ulong4 __ovld __cnfn select(ulong4 a, ulong4 b, ulong4 c);\n" |
| 37978 | "long8 __ovld __cnfn select(long8 a, long8 b, ulong8 c);\n" |
| 37979 | "ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);\n" |
| 37980 | "long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);\n" |
| 37981 | "ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);\n" |
| 37982 | "\n" |
| 37983 | "#ifdef cl_khr_fp64\n" |
| 37984 | "double __ovld __cnfn select(double a, double b, long c);\n" |
| 37985 | "double2 __ovld __cnfn select(double2 a, double2 b, long2 c);\n" |
| 37986 | "double3 __ovld __cnfn select(double3 a, double3 b, long3 c);\n" |
| 37987 | "double4 __ovld __cnfn select(double4 a, double4 b, long4 c);\n" |
| 37988 | "double8 __ovld __cnfn select(double8 a, double8 b, long8 c);\n" |
| 37989 | "double16 __ovld __cnfn select(double16 a, double16 b, long16 c);\n" |
| 37990 | "double __ovld __cnfn select(double a, double b, ulong c);\n" |
| 37991 | "double2 __ovld __cnfn select(double2 a, double2 b, ulong2 c);\n" |
| 37992 | "double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c);\n" |
| 37993 | "double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c);\n" |
| 37994 | "double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c);\n" |
| 37995 | "double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c);\n" |
| 37996 | "#endif //cl_khr_fp64\n" |
| 37997 | "#ifdef cl_khr_fp16\n" |
| 37998 | "half __ovld __cnfn select(half a, half b, short c);\n" |
| 37999 | "half2 __ovld __cnfn select(half2 a, half2 b, short2 c);\n" |
| 38000 | "half3 __ovld __cnfn select(half3 a, half3 b, short3 c);\n" |
| 38001 | "half4 __ovld __cnfn select(half4 a, half4 b, short4 c);\n" |
| 38002 | "half8 __ovld __cnfn select(half8 a, half8 b, short8 c);\n" |
| 38003 | "half16 __ovld __cnfn select(half16 a, half16 b, short16 c);\n" |
| 38004 | "half __ovld __cnfn select(half a, half b, ushort c);\n" |
| 38005 | "half2 __ovld __cnfn select(half2 a, half2 b, ushort2 c);\n" |
| 38006 | "half3 __ovld __cnfn select(half3 a, half3 b, ushort3 c);\n" |
| 38007 | "half4 __ovld __cnfn select(half4 a, half4 b, ushort4 c);\n" |
| 38008 | "half8 __ovld __cnfn select(half8 a, half8 b, ushort8 c);\n" |
| 38009 | "half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);\n" |
| 38010 | "#endif //cl_khr_fp16\n" |
| 38011 | "\n" |
| 38012 | "// OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions\n" |
| 38013 | "// OpenCL extensions v1.1 s9.6.6, v1.2 s9.5.6, v2.0 s9.4.6 - Vector Data Load and Store Functions for Half Type\n" |
| 38014 | "/**\n" |
| 38015 | " * Use generic type gentype to indicate the built-in data types\n" |
| 38016 | " * char, uchar, short, ushort, int, uint, long, ulong, float,\n" |
| 38017 | " * double or half.\n" |
| 38018 | " *\n" |
| 38019 | " * vloadn return sizeof (gentypen) bytes of data read from address (p + (offset * n)).\n" |
| 38020 | " *\n" |
| 38021 | " * vstoren write sizeof (gentypen) bytes given by data to address (p + (offset * n)).\n" |
| 38022 | " *\n" |
| 38023 | " * The address computed as (p + (offset * n)) must be\n" |
| 38024 | " * 8-bit aligned if gentype is char, uchar;\n" |
| 38025 | " * 16-bit aligned if gentype is short, ushort, half;\n" |
| 38026 | " * 32-bit aligned if gentype is int, uint, float;\n" |
| 38027 | " * 64-bit aligned if gentype is long, ulong, double.\n" |
| 38028 | " */\n" |
| 38029 | "\n" |
| 38030 | "char2 __ovld vload2(size_t offset, const __constant char *p);\n" |
| 38031 | "uchar2 __ovld vload2(size_t offset, const __constant uchar *p);\n" |
| 38032 | "short2 __ovld vload2(size_t offset, const __constant short *p);\n" |
| 38033 | "ushort2 __ovld vload2(size_t offset, const __constant ushort *p);\n" |
| 38034 | "int2 __ovld vload2(size_t offset, const __constant int *p);\n" |
| 38035 | "uint2 __ovld vload2(size_t offset, const __constant uint *p);\n" |
| 38036 | "long2 __ovld vload2(size_t offset, const __constant long *p);\n" |
| 38037 | "ulong2 __ovld vload2(size_t offset, const __constant ulong *p);\n" |
| 38038 | "float2 __ovld vload2(size_t offset, const __constant float *p);\n" |
| 38039 | "char3 __ovld vload3(size_t offset, const __constant char *p);\n" |
| 38040 | "uchar3 __ovld vload3(size_t offset, const __constant uchar *p);\n" |
| 38041 | "short3 __ovld vload3(size_t offset, const __constant short *p);\n" |
| 38042 | "ushort3 __ovld vload3(size_t offset, const __constant ushort *p);\n" |
| 38043 | "int3 __ovld vload3(size_t offset, const __constant int *p);\n" |
| 38044 | "uint3 __ovld vload3(size_t offset, const __constant uint *p);\n" |
| 38045 | "long3 __ovld vload3(size_t offset, const __constant long *p);\n" |
| 38046 | "ulong3 __ovld vload3(size_t offset, const __constant ulong *p);\n" |
| 38047 | "float3 __ovld vload3(size_t offset, const __constant float *p);\n" |
| 38048 | "char4 __ovld vload4(size_t offset, const __constant char *p);\n" |
| 38049 | "uchar4 __ovld vload4(size_t offset, const __constant uchar *p);\n" |
| 38050 | "short4 __ovld vload4(size_t offset, const __constant short *p);\n" |
| 38051 | "ushort4 __ovld vload4(size_t offset, const __constant ushort *p);\n" |
| 38052 | "int4 __ovld vload4(size_t offset, const __constant int *p);\n" |
| 38053 | "uint4 __ovld vload4(size_t offset, const __constant uint *p);\n" |
| 38054 | "long4 __ovld vload4(size_t offset, const __constant long *p);\n" |
| 38055 | "ulong4 __ovld vload4(size_t offset, const __constant ulong *p);\n" |
| 38056 | "float4 __ovld vload4(size_t offset, const __constant float *p);\n" |
| 38057 | "char8 __ovld vload8(size_t offset, const __constant char *p);\n" |
| 38058 | "uchar8 __ovld vload8(size_t offset, const __constant uchar *p);\n" |
| 38059 | "short8 __ovld vload8(size_t offset, const __constant short *p);\n" |
| 38060 | "ushort8 __ovld vload8(size_t offset, const __constant ushort *p);\n" |
| 38061 | "int8 __ovld vload8(size_t offset, const __constant int *p);\n" |
| 38062 | "uint8 __ovld vload8(size_t offset, const __constant uint *p);\n" |
| 38063 | "long8 __ovld vload8(size_t offset, const __constant long *p);\n" |
| 38064 | "ulong8 __ovld vload8(size_t offset, const __constant ulong *p);\n" |
| 38065 | "float8 __ovld vload8(size_t offset, const __constant float *p);\n" |
| 38066 | "char16 __ovld vload16(size_t offset, const __constant char *p);\n" |
| 38067 | "uchar16 __ovld vload16(size_t offset, const __constant uchar *p);\n" |
| 38068 | "short16 __ovld vload16(size_t offset, const __constant short *p);\n" |
| 38069 | "ushort16 __ovld vload16(size_t offset, const __constant ushort *p);\n" |
| 38070 | "int16 __ovld vload16(size_t offset, const __constant int *p);\n" |
| 38071 | "uint16 __ovld vload16(size_t offset, const __constant uint *p);\n" |
| 38072 | "long16 __ovld vload16(size_t offset, const __constant long *p);\n" |
| 38073 | "ulong16 __ovld vload16(size_t offset, const __constant ulong *p);\n" |
| 38074 | "float16 __ovld vload16(size_t offset, const __constant float *p);\n" |
| 38075 | "#ifdef cl_khr_fp64\n" |
| 38076 | "double2 __ovld vload2(size_t offset, const __constant double *p);\n" |
| 38077 | "double3 __ovld vload3(size_t offset, const __constant double *p);\n" |
| 38078 | "double4 __ovld vload4(size_t offset, const __constant double *p);\n" |
| 38079 | "double8 __ovld vload8(size_t offset, const __constant double *p);\n" |
| 38080 | "double16 __ovld vload16(size_t offset, const __constant double *p);\n" |
| 38081 | "#endif //cl_khr_fp64\n" |
| 38082 | "\n" |
| 38083 | "#ifdef cl_khr_fp16\n" |
| 38084 | "half __ovld vload(size_t offset, const __constant half *p);\n" |
| 38085 | "half2 __ovld vload2(size_t offset, const __constant half *p);\n" |
| 38086 | "half3 __ovld vload3(size_t offset, const __constant half *p);\n" |
| 38087 | "half4 __ovld vload4(size_t offset, const __constant half *p);\n" |
| 38088 | "half8 __ovld vload8(size_t offset, const __constant half *p);\n" |
| 38089 | "half16 __ovld vload16(size_t offset, const __constant half *p);\n" |
| 38090 | "#endif //cl_khr_fp16\n" |
| 38091 | "\n" |
| 38092 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38093 | "char2 __ovld vload2(size_t offset, const char *p);\n" |
| 38094 | "uchar2 __ovld vload2(size_t offset, const uchar *p);\n" |
| 38095 | "short2 __ovld vload2(size_t offset, const short *p);\n" |
| 38096 | "ushort2 __ovld vload2(size_t offset, const ushort *p);\n" |
| 38097 | "int2 __ovld vload2(size_t offset, const int *p);\n" |
| 38098 | "uint2 __ovld vload2(size_t offset, const uint *p);\n" |
| 38099 | "long2 __ovld vload2(size_t offset, const long *p);\n" |
| 38100 | "ulong2 __ovld vload2(size_t offset, const ulong *p);\n" |
| 38101 | "float2 __ovld vload2(size_t offset, const float *p);\n" |
| 38102 | "char3 __ovld vload3(size_t offset, const char *p);\n" |
| 38103 | "uchar3 __ovld vload3(size_t offset, const uchar *p);\n" |
| 38104 | "short3 __ovld vload3(size_t offset, const short *p);\n" |
| 38105 | "ushort3 __ovld vload3(size_t offset, const ushort *p);\n" |
| 38106 | "int3 __ovld vload3(size_t offset, const int *p);\n" |
| 38107 | "uint3 __ovld vload3(size_t offset, const uint *p);\n" |
| 38108 | "long3 __ovld vload3(size_t offset, const long *p);\n" |
| 38109 | "ulong3 __ovld vload3(size_t offset, const ulong *p);\n" |
| 38110 | "float3 __ovld vload3(size_t offset, const float *p);\n" |
| 38111 | "char4 __ovld vload4(size_t offset, const char *p);\n" |
| 38112 | "uchar4 __ovld vload4(size_t offset, const uchar *p);\n" |
| 38113 | "short4 __ovld vload4(size_t offset, const short *p);\n" |
| 38114 | "ushort4 __ovld vload4(size_t offset, const ushort *p);\n" |
| 38115 | "int4 __ovld vload4(size_t offset, const int *p);\n" |
| 38116 | "uint4 __ovld vload4(size_t offset, const uint *p);\n" |
| 38117 | "long4 __ovld vload4(size_t offset, const long *p);\n" |
| 38118 | "ulong4 __ovld vload4(size_t offset, const ulong *p);\n" |
| 38119 | "float4 __ovld vload4(size_t offset, const float *p);\n" |
| 38120 | "char8 __ovld vload8(size_t offset, const char *p);\n" |
| 38121 | "uchar8 __ovld vload8(size_t offset, const uchar *p);\n" |
| 38122 | "short8 __ovld vload8(size_t offset, const short *p);\n" |
| 38123 | "ushort8 __ovld vload8(size_t offset, const ushort *p);\n" |
| 38124 | "int8 __ovld vload8(size_t offset, const int *p);\n" |
| 38125 | "uint8 __ovld vload8(size_t offset, const uint *p);\n" |
| 38126 | "long8 __ovld vload8(size_t offset, const long *p);\n" |
| 38127 | "ulong8 __ovld vload8(size_t offset, const ulong *p);\n" |
| 38128 | "float8 __ovld vload8(size_t offset, const float *p);\n" |
| 38129 | "char16 __ovld vload16(size_t offset, const char *p);\n" |
| 38130 | "uchar16 __ovld vload16(size_t offset, const uchar *p);\n" |
| 38131 | "short16 __ovld vload16(size_t offset, const short *p);\n" |
| 38132 | "ushort16 __ovld vload16(size_t offset, const ushort *p);\n" |
| 38133 | "int16 __ovld vload16(size_t offset, const int *p);\n" |
| 38134 | "uint16 __ovld vload16(size_t offset, const uint *p);\n" |
| 38135 | "long16 __ovld vload16(size_t offset, const long *p);\n" |
| 38136 | "ulong16 __ovld vload16(size_t offset, const ulong *p);\n" |
| 38137 | "float16 __ovld vload16(size_t offset, const float *p);\n" |
| 38138 | "\n" |
| 38139 | "#ifdef cl_khr_fp64\n" |
| 38140 | "double2 __ovld vload2(size_t offset, const double *p);\n" |
| 38141 | "double3 __ovld vload3(size_t offset, const double *p);\n" |
| 38142 | "double4 __ovld vload4(size_t offset, const double *p);\n" |
| 38143 | "double8 __ovld vload8(size_t offset, const double *p);\n" |
| 38144 | "double16 __ovld vload16(size_t offset, const double *p);\n" |
| 38145 | "#endif //cl_khr_fp64\n" |
| 38146 | "\n" |
| 38147 | "#ifdef cl_khr_fp16\n" |
| 38148 | "half __ovld vload(size_t offset, const half *p);\n" |
| 38149 | "half2 __ovld vload2(size_t offset, const half *p);\n" |
| 38150 | "half3 __ovld vload3(size_t offset, const half *p);\n" |
| 38151 | "half4 __ovld vload4(size_t offset, const half *p);\n" |
| 38152 | "half8 __ovld vload8(size_t offset, const half *p);\n" |
| 38153 | "half16 __ovld vload16(size_t offset, const half *p);\n" |
| 38154 | "#endif //cl_khr_fp16\n" |
| 38155 | "#else\n" |
| 38156 | "char2 __ovld vload2(size_t offset, const __global char *p);\n" |
| 38157 | "uchar2 __ovld vload2(size_t offset, const __global uchar *p);\n" |
| 38158 | "short2 __ovld vload2(size_t offset, const __global short *p);\n" |
| 38159 | "ushort2 __ovld vload2(size_t offset, const __global ushort *p);\n" |
| 38160 | "int2 __ovld vload2(size_t offset, const __global int *p);\n" |
| 38161 | "uint2 __ovld vload2(size_t offset, const __global uint *p);\n" |
| 38162 | "long2 __ovld vload2(size_t offset, const __global long *p);\n" |
| 38163 | "ulong2 __ovld vload2(size_t offset, const __global ulong *p);\n" |
| 38164 | "float2 __ovld vload2(size_t offset, const __global float *p);\n" |
| 38165 | "char3 __ovld vload3(size_t offset, const __global char *p);\n" |
| 38166 | "uchar3 __ovld vload3(size_t offset, const __global uchar *p);\n" |
| 38167 | "short3 __ovld vload3(size_t offset, const __global short *p);\n" |
| 38168 | "ushort3 __ovld vload3(size_t offset, const __global ushort *p);\n" |
| 38169 | "int3 __ovld vload3(size_t offset, const __global int *p);\n" |
| 38170 | "uint3 __ovld vload3(size_t offset, const __global uint *p);\n" |
| 38171 | "long3 __ovld vload3(size_t offset, const __global long *p);\n" |
| 38172 | "ulong3 __ovld vload3(size_t offset, const __global ulong *p);\n" |
| 38173 | "float3 __ovld vload3(size_t offset, const __global float *p);\n" |
| 38174 | "char4 __ovld vload4(size_t offset, const __global char *p);\n" |
| 38175 | "uchar4 __ovld vload4(size_t offset, const __global uchar *p);\n" |
| 38176 | "short4 __ovld vload4(size_t offset, const __global short *p);\n" |
| 38177 | "ushort4 __ovld vload4(size_t offset, const __global ushort *p);\n" |
| 38178 | "int4 __ovld vload4(size_t offset, const __global int *p);\n" |
| 38179 | "uint4 __ovld vload4(size_t offset, const __global uint *p);\n" |
| 38180 | "long4 __ovld vload4(size_t offset, const __global long *p);\n" |
| 38181 | "ulong4 __ovld vload4(size_t offset, const __global ulong *p);\n" |
| 38182 | "float4 __ovld vload4(size_t offset, const __global float *p);\n" |
| 38183 | "char8 __ovld vload8(size_t offset, const __global char *p);\n" |
| 38184 | "uchar8 __ovld vload8(size_t offset, const __global uchar *p);\n" |
| 38185 | "short8 __ovld vload8(size_t offset, const __global short *p);\n" |
| 38186 | "ushort8 __ovld vload8(size_t offset, const __global ushort *p);\n" |
| 38187 | "int8 __ovld vload8(size_t offset, const __global int *p);\n" |
| 38188 | "uint8 __ovld vload8(size_t offset, const __global uint *p);\n" |
| 38189 | "long8 __ovld vload8(size_t offset, const __global long *p);\n" |
| 38190 | "ulong8 __ovld vload8(size_t offset, const __global ulong *p);\n" |
| 38191 | "float8 __ovld vload8(size_t offset, const __global float *p);\n" |
| 38192 | "char16 __ovld vload16(size_t offset, const __global char *p);\n" |
| 38193 | "uchar16 __ovld vload16(size_t offset, const __global uchar *p);\n" |
| 38194 | "short16 __ovld vload16(size_t offset, const __global short *p);\n" |
| 38195 | "ushort16 __ovld vload16(size_t offset, const __global ushort *p);\n" |
| 38196 | "int16 __ovld vload16(size_t offset, const __global int *p);\n" |
| 38197 | "uint16 __ovld vload16(size_t offset, const __global uint *p);\n" |
| 38198 | "long16 __ovld vload16(size_t offset, const __global long *p);\n" |
| 38199 | "ulong16 __ovld vload16(size_t offset, const __global ulong *p);\n" |
| 38200 | "float16 __ovld vload16(size_t offset, const __global float *p);\n" |
| 38201 | "char2 __ovld vload2(size_t offset, const __local char *p);\n" |
| 38202 | "uchar2 __ovld vload2(size_t offset, const __local uchar *p);\n" |
| 38203 | "short2 __ovld vload2(size_t offset, const __local short *p);\n" |
| 38204 | "ushort2 __ovld vload2(size_t offset, const __local ushort *p);\n" |
| 38205 | "int2 __ovld vload2(size_t offset, const __local int *p);\n" |
| 38206 | "uint2 __ovld vload2(size_t offset, const __local uint *p);\n" |
| 38207 | "long2 __ovld vload2(size_t offset, const __local long *p);\n" |
| 38208 | "ulong2 __ovld vload2(size_t offset, const __local ulong *p);\n" |
| 38209 | "float2 __ovld vload2(size_t offset, const __local float *p);\n" |
| 38210 | "char3 __ovld vload3(size_t offset, const __local char *p);\n" |
| 38211 | "uchar3 __ovld vload3(size_t offset, const __local uchar *p);\n" |
| 38212 | "short3 __ovld vload3(size_t offset, const __local short *p);\n" |
| 38213 | "ushort3 __ovld vload3(size_t offset, const __local ushort *p);\n" |
| 38214 | "int3 __ovld vload3(size_t offset, const __local int *p);\n" |
| 38215 | "uint3 __ovld vload3(size_t offset, const __local uint *p);\n" |
| 38216 | "long3 __ovld vload3(size_t offset, const __local long *p);\n" |
| 38217 | "ulong3 __ovld vload3(size_t offset, const __local ulong *p);\n" |
| 38218 | "float3 __ovld vload3(size_t offset, const __local float *p);\n" |
| 38219 | "char4 __ovld vload4(size_t offset, const __local char *p);\n" |
| 38220 | "uchar4 __ovld vload4(size_t offset, const __local uchar *p);\n" |
| 38221 | "short4 __ovld vload4(size_t offset, const __local short *p);\n" |
| 38222 | "ushort4 __ovld vload4(size_t offset, const __local ushort *p);\n" |
| 38223 | "int4 __ovld vload4(size_t offset, const __local int *p);\n" |
| 38224 | "uint4 __ovld vload4(size_t offset, const __local uint *p);\n" |
| 38225 | "long4 __ovld vload4(size_t offset, const __local long *p);\n" |
| 38226 | "ulong4 __ovld vload4(size_t offset, const __local ulong *p);\n" |
| 38227 | "float4 __ovld vload4(size_t offset, const __local float *p);\n" |
| 38228 | "char8 __ovld vload8(size_t offset, const __local char *p);\n" |
| 38229 | "uchar8 __ovld vload8(size_t offset, const __local uchar *p);\n" |
| 38230 | "short8 __ovld vload8(size_t offset, const __local short *p);\n" |
| 38231 | "ushort8 __ovld vload8(size_t offset, const __local ushort *p);\n" |
| 38232 | "int8 __ovld vload8(size_t offset, const __local int *p);\n" |
| 38233 | "uint8 __ovld vload8(size_t offset, const __local uint *p);\n" |
| 38234 | "long8 __ovld vload8(size_t offset, const __local long *p);\n" |
| 38235 | "ulong8 __ovld vload8(size_t offset, const __local ulong *p);\n" |
| 38236 | "float8 __ovld vload8(size_t offset, const __local float *p);\n" |
| 38237 | "char16 __ovld vload16(size_t offset, const __local char *p);\n" |
| 38238 | "uchar16 __ovld vload16(size_t offset, const __local uchar *p);\n" |
| 38239 | "short16 __ovld vload16(size_t offset, const __local short *p);\n" |
| 38240 | "ushort16 __ovld vload16(size_t offset, const __local ushort *p);\n" |
| 38241 | "int16 __ovld vload16(size_t offset, const __local int *p);\n" |
| 38242 | "uint16 __ovld vload16(size_t offset, const __local uint *p);\n" |
| 38243 | "long16 __ovld vload16(size_t offset, const __local long *p);\n" |
| 38244 | "ulong16 __ovld vload16(size_t offset, const __local ulong *p);\n" |
| 38245 | "float16 __ovld vload16(size_t offset, const __local float *p);\n" |
| 38246 | "char2 __ovld vload2(size_t offset, const __private char *p);\n" |
| 38247 | "uchar2 __ovld vload2(size_t offset, const __private uchar *p);\n" |
| 38248 | "short2 __ovld vload2(size_t offset, const __private short *p);\n" |
| 38249 | "ushort2 __ovld vload2(size_t offset, const __private ushort *p);\n" |
| 38250 | "int2 __ovld vload2(size_t offset, const __private int *p);\n" |
| 38251 | "uint2 __ovld vload2(size_t offset, const __private uint *p);\n" |
| 38252 | "long2 __ovld vload2(size_t offset, const __private long *p);\n" |
| 38253 | "ulong2 __ovld vload2(size_t offset, const __private ulong *p);\n" |
| 38254 | "float2 __ovld vload2(size_t offset, const __private float *p);\n" |
| 38255 | "char3 __ovld vload3(size_t offset, const __private char *p);\n" |
| 38256 | "uchar3 __ovld vload3(size_t offset, const __private uchar *p);\n" |
| 38257 | "short3 __ovld vload3(size_t offset, const __private short *p);\n" |
| 38258 | "ushort3 __ovld vload3(size_t offset, const __private ushort *p);\n" |
| 38259 | "int3 __ovld vload3(size_t offset, const __private int *p);\n" |
| 38260 | "uint3 __ovld vload3(size_t offset, const __private uint *p);\n" |
| 38261 | "long3 __ovld vload3(size_t offset, const __private long *p);\n" |
| 38262 | "ulong3 __ovld vload3(size_t offset, const __private ulong *p);\n" |
| 38263 | "float3 __ovld vload3(size_t offset, const __private float *p);\n" |
| 38264 | "char4 __ovld vload4(size_t offset, const __private char *p);\n" |
| 38265 | "uchar4 __ovld vload4(size_t offset, const __private uchar *p);\n" |
| 38266 | "short4 __ovld vload4(size_t offset, const __private short *p);\n" |
| 38267 | "ushort4 __ovld vload4(size_t offset, const __private ushort *p);\n" |
| 38268 | "int4 __ovld vload4(size_t offset, const __private int *p);\n" |
| 38269 | "uint4 __ovld vload4(size_t offset, const __private uint *p);\n" |
| 38270 | "long4 __ovld vload4(size_t offset, const __private long *p);\n" |
| 38271 | "ulong4 __ovld vload4(size_t offset, const __private ulong *p);\n" |
| 38272 | "float4 __ovld vload4(size_t offset, const __private float *p);\n" |
| 38273 | "char8 __ovld vload8(size_t offset, const __private char *p);\n" |
| 38274 | "uchar8 __ovld vload8(size_t offset, const __private uchar *p);\n" |
| 38275 | "short8 __ovld vload8(size_t offset, const __private short *p);\n" |
| 38276 | "ushort8 __ovld vload8(size_t offset, const __private ushort *p);\n" |
| 38277 | "int8 __ovld vload8(size_t offset, const __private int *p);\n" |
| 38278 | "uint8 __ovld vload8(size_t offset, const __private uint *p);\n" |
| 38279 | "long8 __ovld vload8(size_t offset, const __private long *p);\n" |
| 38280 | "ulong8 __ovld vload8(size_t offset, const __private ulong *p);\n" |
| 38281 | "float8 __ovld vload8(size_t offset, const __private float *p);\n" |
| 38282 | "char16 __ovld vload16(size_t offset, const __private char *p);\n" |
| 38283 | "uchar16 __ovld vload16(size_t offset, const __private uchar *p);\n" |
| 38284 | "short16 __ovld vload16(size_t offset, const __private short *p);\n" |
| 38285 | "ushort16 __ovld vload16(size_t offset, const __private ushort *p);\n" |
| 38286 | "int16 __ovld vload16(size_t offset, const __private int *p);\n" |
| 38287 | "uint16 __ovld vload16(size_t offset, const __private uint *p);\n" |
| 38288 | "long16 __ovld vload16(size_t offset, const __private long *p);\n" |
| 38289 | "ulong16 __ovld vload16(size_t offset, const __private ulong *p);\n" |
| 38290 | "float16 __ovld vload16(size_t offset, const __private float *p);\n" |
| 38291 | "\n" |
| 38292 | "#ifdef cl_khr_fp64\n" |
| 38293 | "double2 __ovld vload2(size_t offset, const __global double *p);\n" |
| 38294 | "double3 __ovld vload3(size_t offset, const __global double *p);\n" |
| 38295 | "double4 __ovld vload4(size_t offset, const __global double *p);\n" |
| 38296 | "double8 __ovld vload8(size_t offset, const __global double *p);\n" |
| 38297 | "double16 __ovld vload16(size_t offset, const __global double *p);\n" |
| 38298 | "double2 __ovld vload2(size_t offset, const __local double *p);\n" |
| 38299 | "double3 __ovld vload3(size_t offset, const __local double *p);\n" |
| 38300 | "double4 __ovld vload4(size_t offset, const __local double *p);\n" |
| 38301 | "double8 __ovld vload8(size_t offset, const __local double *p);\n" |
| 38302 | "double16 __ovld vload16(size_t offset, const __local double *p);\n" |
| 38303 | "double2 __ovld vload2(size_t offset, const __private double *p);\n" |
| 38304 | "double3 __ovld vload3(size_t offset, const __private double *p);\n" |
| 38305 | "double4 __ovld vload4(size_t offset, const __private double *p);\n" |
| 38306 | "double8 __ovld vload8(size_t offset, const __private double *p);\n" |
| 38307 | "double16 __ovld vload16(size_t offset, const __private double *p);\n" |
| 38308 | "#endif //cl_khr_fp64\n" |
| 38309 | "\n" |
| 38310 | "#ifdef cl_khr_fp16\n" |
| 38311 | "half __ovld vload(size_t offset, const __global half *p);\n" |
| 38312 | "half2 __ovld vload2(size_t offset, const __global half *p);\n" |
| 38313 | "half3 __ovld vload3(size_t offset, const __global half *p);\n" |
| 38314 | "half4 __ovld vload4(size_t offset, const __global half *p);\n" |
| 38315 | "half8 __ovld vload8(size_t offset, const __global half *p);\n" |
| 38316 | "half16 __ovld vload16(size_t offset, const __global half *p);\n" |
| 38317 | "half __ovld vload(size_t offset, const __local half *p);\n" |
| 38318 | "half2 __ovld vload2(size_t offset, const __local half *p);\n" |
| 38319 | "half3 __ovld vload3(size_t offset, const __local half *p);\n" |
| 38320 | "half4 __ovld vload4(size_t offset, const __local half *p);\n" |
| 38321 | "half8 __ovld vload8(size_t offset, const __local half *p);\n" |
| 38322 | "half16 __ovld vload16(size_t offset, const __local half *p);\n" |
| 38323 | "half __ovld vload(size_t offset, const __private half *p);\n" |
| 38324 | "half2 __ovld vload2(size_t offset, const __private half *p);\n" |
| 38325 | "half3 __ovld vload3(size_t offset, const __private half *p);\n" |
| 38326 | "half4 __ovld vload4(size_t offset, const __private half *p);\n" |
| 38327 | "half8 __ovld vload8(size_t offset, const __private half *p);\n" |
| 38328 | "half16 __ovld vload16(size_t offset, const __private half *p);\n" |
| 38329 | "#endif //cl_khr_fp16\n" |
| 38330 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38331 | "\n" |
| 38332 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38333 | "void __ovld vstore2(char2 data, size_t offset, char *p);\n" |
| 38334 | "void __ovld vstore2(uchar2 data, size_t offset, uchar *p);\n" |
| 38335 | "void __ovld vstore2(short2 data, size_t offset, short *p);\n" |
| 38336 | "void __ovld vstore2(ushort2 data, size_t offset, ushort *p);\n" |
| 38337 | "void __ovld vstore2(int2 data, size_t offset, int *p);\n" |
| 38338 | "void __ovld vstore2(uint2 data, size_t offset, uint *p);\n" |
| 38339 | "void __ovld vstore2(long2 data, size_t offset, long *p);\n" |
| 38340 | "void __ovld vstore2(ulong2 data, size_t offset, ulong *p);\n" |
| 38341 | "void __ovld vstore2(float2 data, size_t offset, float *p);\n" |
| 38342 | "void __ovld vstore3(char3 data, size_t offset, char *p);\n" |
| 38343 | "void __ovld vstore3(uchar3 data, size_t offset, uchar *p);\n" |
| 38344 | "void __ovld vstore3(short3 data, size_t offset, short *p);\n" |
| 38345 | "void __ovld vstore3(ushort3 data, size_t offset, ushort *p);\n" |
| 38346 | "void __ovld vstore3(int3 data, size_t offset, int *p);\n" |
| 38347 | "void __ovld vstore3(uint3 data, size_t offset, uint *p);\n" |
| 38348 | "void __ovld vstore3(long3 data, size_t offset, long *p);\n" |
| 38349 | "void __ovld vstore3(ulong3 data, size_t offset, ulong *p);\n" |
| 38350 | "void __ovld vstore3(float3 data, size_t offset, float *p);\n" |
| 38351 | "void __ovld vstore4(char4 data, size_t offset, char *p);\n" |
| 38352 | "void __ovld vstore4(uchar4 data, size_t offset, uchar *p);\n" |
| 38353 | "void __ovld vstore4(short4 data, size_t offset, short *p);\n" |
| 38354 | "void __ovld vstore4(ushort4 data, size_t offset, ushort *p);\n" |
| 38355 | "void __ovld vstore4(int4 data, size_t offset, int *p);\n" |
| 38356 | "void __ovld vstore4(uint4 data, size_t offset, uint *p);\n" |
| 38357 | "void __ovld vstore4(long4 data, size_t offset, long *p);\n" |
| 38358 | "void __ovld vstore4(ulong4 data, size_t offset, ulong *p);\n" |
| 38359 | "void __ovld vstore4(float4 data, size_t offset, float *p);\n" |
| 38360 | "void __ovld vstore8(char8 data, size_t offset, char *p);\n" |
| 38361 | "void __ovld vstore8(uchar8 data, size_t offset, uchar *p);\n" |
| 38362 | "void __ovld vstore8(short8 data, size_t offset, short *p);\n" |
| 38363 | "void __ovld vstore8(ushort8 data, size_t offset, ushort *p);\n" |
| 38364 | "void __ovld vstore8(int8 data, size_t offset, int *p);\n" |
| 38365 | "void __ovld vstore8(uint8 data, size_t offset, uint *p);\n" |
| 38366 | "void __ovld vstore8(long8 data, size_t offset, long *p);\n" |
| 38367 | "void __ovld vstore8(ulong8 data, size_t offset, ulong *p);\n" |
| 38368 | "void __ovld vstore8(float8 data, size_t offset, float *p);\n" |
| 38369 | "void __ovld vstore16(char16 data, size_t offset, char *p);\n" |
| 38370 | "void __ovld vstore16(uchar16 data, size_t offset, uchar *p);\n" |
| 38371 | "void __ovld vstore16(short16 data, size_t offset, short *p);\n" |
| 38372 | "void __ovld vstore16(ushort16 data, size_t offset, ushort *p);\n" |
| 38373 | "void __ovld vstore16(int16 data, size_t offset, int *p);\n" |
| 38374 | "void __ovld vstore16(uint16 data, size_t offset, uint *p);\n" |
| 38375 | "void __ovld vstore16(long16 data, size_t offset, long *p);\n" |
| 38376 | "void __ovld vstore16(ulong16 data, size_t offset, ulong *p);\n" |
| 38377 | "void __ovld vstore16(float16 data, size_t offset, float *p);\n" |
| 38378 | "#ifdef cl_khr_fp64\n" |
| 38379 | "void __ovld vstore2(double2 data, size_t offset, double *p);\n" |
| 38380 | "void __ovld vstore3(double3 data, size_t offset, double *p);\n" |
| 38381 | "void __ovld vstore4(double4 data, size_t offset, double *p);\n" |
| 38382 | "void __ovld vstore8(double8 data, size_t offset, double *p);\n" |
| 38383 | "void __ovld vstore16(double16 data, size_t offset, double *p);\n" |
| 38384 | "#endif //cl_khr_fp64\n" |
| 38385 | "#ifdef cl_khr_fp16\n" |
| 38386 | "void __ovld vstore(half data, size_t offset, half *p);\n" |
| 38387 | "void __ovld vstore2(half2 data, size_t offset, half *p);\n" |
| 38388 | "void __ovld vstore3(half3 data, size_t offset, half *p);\n" |
| 38389 | "void __ovld vstore4(half4 data, size_t offset, half *p);\n" |
| 38390 | "void __ovld vstore8(half8 data, size_t offset, half *p);\n" |
| 38391 | "void __ovld vstore16(half16 data, size_t offset, half *p);\n" |
| 38392 | "#endif //cl_khr_fp16\n" |
| 38393 | "#else\n" |
| 38394 | "void __ovld vstore2(char2 data, size_t offset, __global char *p);\n" |
| 38395 | "void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p);\n" |
| 38396 | "void __ovld vstore2(short2 data, size_t offset, __global short *p);\n" |
| 38397 | "void __ovld vstore2(ushort2 data, size_t offset, __global ushort *p);\n" |
| 38398 | "void __ovld vstore2(int2 data, size_t offset, __global int *p);\n" |
| 38399 | "void __ovld vstore2(uint2 data, size_t offset, __global uint *p);\n" |
| 38400 | "void __ovld vstore2(long2 data, size_t offset, __global long *p);\n" |
| 38401 | "void __ovld vstore2(ulong2 data, size_t offset, __global ulong *p);\n" |
| 38402 | "void __ovld vstore2(float2 data, size_t offset, __global float *p);\n" |
| 38403 | "void __ovld vstore3(char3 data, size_t offset, __global char *p);\n" |
| 38404 | "void __ovld vstore3(uchar3 data, size_t offset, __global uchar *p);\n" |
| 38405 | "void __ovld vstore3(short3 data, size_t offset, __global short *p);\n" |
| 38406 | "void __ovld vstore3(ushort3 data, size_t offset, __global ushort *p);\n" |
| 38407 | "void __ovld vstore3(int3 data, size_t offset, __global int *p);\n" |
| 38408 | "void __ovld vstore3(uint3 data, size_t offset, __global uint *p);\n" |
| 38409 | "void __ovld vstore3(long3 data, size_t offset, __global long *p);\n" |
| 38410 | "void __ovld vstore3(ulong3 data, size_t offset, __global ulong *p);\n" |
| 38411 | "void __ovld vstore3(float3 data, size_t offset, __global float *p);\n" |
| 38412 | "void __ovld vstore4(char4 data, size_t offset, __global char *p);\n" |
| 38413 | "void __ovld vstore4(uchar4 data, size_t offset, __global uchar *p);\n" |
| 38414 | "void __ovld vstore4(short4 data, size_t offset, __global short *p);\n" |
| 38415 | "void __ovld vstore4(ushort4 data, size_t offset, __global ushort *p);\n" |
| 38416 | "void __ovld vstore4(int4 data, size_t offset, __global int *p);\n" |
| 38417 | "void __ovld vstore4(uint4 data, size_t offset, __global uint *p);\n" |
| 38418 | "void __ovld vstore4(long4 data, size_t offset, __global long *p);\n" |
| 38419 | "void __ovld vstore4(ulong4 data, size_t offset, __global ulong *p);\n" |
| 38420 | "void __ovld vstore4(float4 data, size_t offset, __global float *p);\n" |
| 38421 | "void __ovld vstore8(char8 data, size_t offset, __global char *p);\n" |
| 38422 | "void __ovld vstore8(uchar8 data, size_t offset, __global uchar *p);\n" |
| 38423 | "void __ovld vstore8(short8 data, size_t offset, __global short *p);\n" |
| 38424 | "void __ovld vstore8(ushort8 data, size_t offset, __global ushort *p);\n" |
| 38425 | "void __ovld vstore8(int8 data, size_t offset, __global int *p);\n" |
| 38426 | "void __ovld vstore8(uint8 data, size_t offset, __global uint *p);\n" |
| 38427 | "void __ovld vstore8(long8 data, size_t offset, __global long *p);\n" |
| 38428 | "void __ovld vstore8(ulong8 data, size_t offset, __global ulong *p);\n" |
| 38429 | "void __ovld vstore8(float8 data, size_t offset, __global float *p);\n" |
| 38430 | "void __ovld vstore16(char16 data, size_t offset, __global char *p);\n" |
| 38431 | "void __ovld vstore16(uchar16 data, size_t offset, __global uchar *p);\n" |
| 38432 | "void __ovld vstore16(short16 data, size_t offset, __global short *p);\n" |
| 38433 | "void __ovld vstore16(ushort16 data, size_t offset, __global ushort *p);\n" |
| 38434 | "void __ovld vstore16(int16 data, size_t offset, __global int *p);\n" |
| 38435 | "void __ovld vstore16(uint16 data, size_t offset, __global uint *p);\n" |
| 38436 | "void __ovld vstore16(long16 data, size_t offset, __global long *p);\n" |
| 38437 | "void __ovld vstore16(ulong16 data, size_t offset, __global ulong *p);\n" |
| 38438 | "void __ovld vstore16(float16 data, size_t offset, __global float *p);\n" |
| 38439 | "void __ovld vstore2(char2 data, size_t offset, __local char *p);\n" |
| 38440 | "void __ovld vstore2(uchar2 data, size_t offset, __local uchar *p);\n" |
| 38441 | "void __ovld vstore2(short2 data, size_t offset, __local short *p);\n" |
| 38442 | "void __ovld vstore2(ushort2 data, size_t offset, __local ushort *p);\n" |
| 38443 | "void __ovld vstore2(int2 data, size_t offset, __local int *p);\n" |
| 38444 | "void __ovld vstore2(uint2 data, size_t offset, __local uint *p);\n" |
| 38445 | "void __ovld vstore2(long2 data, size_t offset, __local long *p);\n" |
| 38446 | "void __ovld vstore2(ulong2 data, size_t offset, __local ulong *p);\n" |
| 38447 | "void __ovld vstore2(float2 data, size_t offset, __local float *p);\n" |
| 38448 | "void __ovld vstore3(char3 data, size_t offset, __local char *p);\n" |
| 38449 | "void __ovld vstore3(uchar3 data, size_t offset, __local uchar *p);\n" |
| 38450 | "void __ovld vstore3(short3 data, size_t offset, __local short *p);\n" |
| 38451 | "void __ovld vstore3(ushort3 data, size_t offset, __local ushort *p);\n" |
| 38452 | "void __ovld vstore3(int3 data, size_t offset, __local int *p);\n" |
| 38453 | "void __ovld vstore3(uint3 data, size_t offset, __local uint *p);\n" |
| 38454 | "void __ovld vstore3(long3 data, size_t offset, __local long *p);\n" |
| 38455 | "void __ovld vstore3(ulong3 data, size_t offset, __local ulong *p);\n" |
| 38456 | "void __ovld vstore3(float3 data, size_t offset, __local float *p);\n" |
| 38457 | "void __ovld vstore4(char4 data, size_t offset, __local char *p);\n" |
| 38458 | "void __ovld vstore4(uchar4 data, size_t offset, __local uchar *p);\n" |
| 38459 | "void __ovld vstore4(short4 data, size_t offset, __local short *p);\n" |
| 38460 | "void __ovld vstore4(ushort4 data, size_t offset, __local ushort *p);\n" |
| 38461 | "void __ovld vstore4(int4 data, size_t offset, __local int *p);\n" |
| 38462 | "void __ovld vstore4(uint4 data, size_t offset, __local uint *p);\n" |
| 38463 | "void __ovld vstore4(long4 data, size_t offset, __local long *p);\n" |
| 38464 | "void __ovld vstore4(ulong4 data, size_t offset, __local ulong *p);\n" |
| 38465 | "void __ovld vstore4(float4 data, size_t offset, __local float *p);\n" |
| 38466 | "void __ovld vstore8(char8 data, size_t offset, __local char *p);\n" |
| 38467 | "void __ovld vstore8(uchar8 data, size_t offset, __local uchar *p);\n" |
| 38468 | "void __ovld vstore8(short8 data, size_t offset, __local short *p);\n" |
| 38469 | "void __ovld vstore8(ushort8 data, size_t offset, __local ushort *p);\n" |
| 38470 | "void __ovld vstore8(int8 data, size_t offset, __local int *p);\n" |
| 38471 | "void __ovld vstore8(uint8 data, size_t offset, __local uint *p);\n" |
| 38472 | "void __ovld vstore8(long8 data, size_t offset, __local long *p);\n" |
| 38473 | "void __ovld vstore8(ulong8 data, size_t offset, __local ulong *p);\n" |
| 38474 | "void __ovld vstore8(float8 data, size_t offset, __local float *p);\n" |
| 38475 | "void __ovld vstore16(char16 data, size_t offset, __local char *p);\n" |
| 38476 | "void __ovld vstore16(uchar16 data, size_t offset, __local uchar *p);\n" |
| 38477 | "void __ovld vstore16(short16 data, size_t offset, __local short *p);\n" |
| 38478 | "void __ovld vstore16(ushort16 data, size_t offset, __local ushort *p);\n" |
| 38479 | "void __ovld vstore16(int16 data, size_t offset, __local int *p);\n" |
| 38480 | "void __ovld vstore16(uint16 data, size_t offset, __local uint *p);\n" |
| 38481 | "void __ovld vstore16(long16 data, size_t offset, __local long *p);\n" |
| 38482 | "void __ovld vstore16(ulong16 data, size_t offset, __local ulong *p);\n" |
| 38483 | "void __ovld vstore16(float16 data, size_t offset, __local float *p);\n" |
| 38484 | "void __ovld vstore2(char2 data, size_t offset, __private char *p);\n" |
| 38485 | "void __ovld vstore2(uchar2 data, size_t offset, __private uchar *p);\n" |
| 38486 | "void __ovld vstore2(short2 data, size_t offset, __private short *p);\n" |
| 38487 | "void __ovld vstore2(ushort2 data, size_t offset, __private ushort *p);\n" |
| 38488 | "void __ovld vstore2(int2 data, size_t offset, __private int *p);\n" |
| 38489 | "void __ovld vstore2(uint2 data, size_t offset, __private uint *p);\n" |
| 38490 | "void __ovld vstore2(long2 data, size_t offset, __private long *p);\n" |
| 38491 | "void __ovld vstore2(ulong2 data, size_t offset, __private ulong *p);\n" |
| 38492 | "void __ovld vstore2(float2 data, size_t offset, __private float *p);\n" |
| 38493 | "void __ovld vstore3(char3 data, size_t offset, __private char *p);\n" |
| 38494 | "void __ovld vstore3(uchar3 data, size_t offset, __private uchar *p);\n" |
| 38495 | "void __ovld vstore3(short3 data, size_t offset, __private short *p);\n" |
| 38496 | "void __ovld vstore3(ushort3 data, size_t offset, __private ushort *p);\n" |
| 38497 | "void __ovld vstore3(int3 data, size_t offset, __private int *p);\n" |
| 38498 | "void __ovld vstore3(uint3 data, size_t offset, __private uint *p);\n" |
| 38499 | "void __ovld vstore3(long3 data, size_t offset, __private long *p);\n" |
| 38500 | "void __ovld vstore3(ulong3 data, size_t offset, __private ulong *p);\n" |
| 38501 | "void __ovld vstore3(float3 data, size_t offset, __private float *p);\n" |
| 38502 | "void __ovld vstore4(char4 data, size_t offset, __private char *p);\n" |
| 38503 | "void __ovld vstore4(uchar4 data, size_t offset, __private uchar *p);\n" |
| 38504 | "void __ovld vstore4(short4 data, size_t offset, __private short *p);\n" |
| 38505 | "void __ovld vstore4(ushort4 data, size_t offset, __private ushort *p);\n" |
| 38506 | "void __ovld vstore4(int4 data, size_t offset, __private int *p);\n" |
| 38507 | "void __ovld vstore4(uint4 data, size_t offset, __private uint *p);\n" |
| 38508 | "void __ovld vstore4(long4 data, size_t offset, __private long *p);\n" |
| 38509 | "void __ovld vstore4(ulong4 data, size_t offset, __private ulong *p);\n" |
| 38510 | "void __ovld vstore4(float4 data, size_t offset, __private float *p);\n" |
| 38511 | "void __ovld vstore8(char8 data, size_t offset, __private char *p);\n" |
| 38512 | "void __ovld vstore8(uchar8 data, size_t offset, __private uchar *p);\n" |
| 38513 | "void __ovld vstore8(short8 data, size_t offset, __private short *p);\n" |
| 38514 | "void __ovld vstore8(ushort8 data, size_t offset, __private ushort *p);\n" |
| 38515 | "void __ovld vstore8(int8 data, size_t offset, __private int *p);\n" |
| 38516 | "void __ovld vstore8(uint8 data, size_t offset, __private uint *p);\n" |
| 38517 | "void __ovld vstore8(long8 data, size_t offset, __private long *p);\n" |
| 38518 | "void __ovld vstore8(ulong8 data, size_t offset, __private ulong *p);\n" |
| 38519 | "void __ovld vstore8(float8 data, size_t offset, __private float *p);\n" |
| 38520 | "void __ovld vstore16(char16 data, size_t offset, __private char *p);\n" |
| 38521 | "void __ovld vstore16(uchar16 data, size_t offset, __private uchar *p);\n" |
| 38522 | "void __ovld vstore16(short16 data, size_t offset, __private short *p);\n" |
| 38523 | "void __ovld vstore16(ushort16 data, size_t offset, __private ushort *p);\n" |
| 38524 | "void __ovld vstore16(int16 data, size_t offset, __private int *p);\n" |
| 38525 | "void __ovld vstore16(uint16 data, size_t offset, __private uint *p);\n" |
| 38526 | "void __ovld vstore16(long16 data, size_t offset, __private long *p);\n" |
| 38527 | "void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p);\n" |
| 38528 | "void __ovld vstore16(float16 data, size_t offset, __private float *p);\n" |
| 38529 | "#ifdef cl_khr_fp64\n" |
| 38530 | "void __ovld vstore2(double2 data, size_t offset, __global double *p);\n" |
| 38531 | "void __ovld vstore3(double3 data, size_t offset, __global double *p);\n" |
| 38532 | "void __ovld vstore4(double4 data, size_t offset, __global double *p);\n" |
| 38533 | "void __ovld vstore8(double8 data, size_t offset, __global double *p);\n" |
| 38534 | "void __ovld vstore16(double16 data, size_t offset, __global double *p);\n" |
| 38535 | "void __ovld vstore2(double2 data, size_t offset, __local double *p);\n" |
| 38536 | "void __ovld vstore3(double3 data, size_t offset, __local double *p);\n" |
| 38537 | "void __ovld vstore4(double4 data, size_t offset, __local double *p);\n" |
| 38538 | "void __ovld vstore8(double8 data, size_t offset, __local double *p);\n" |
| 38539 | "void __ovld vstore16(double16 data, size_t offset, __local double *p);\n" |
| 38540 | "void __ovld vstore2(double2 data, size_t offset, __private double *p);\n" |
| 38541 | "void __ovld vstore3(double3 data, size_t offset, __private double *p);\n" |
| 38542 | "void __ovld vstore4(double4 data, size_t offset, __private double *p);\n" |
| 38543 | "void __ovld vstore8(double8 data, size_t offset, __private double *p);\n" |
| 38544 | "void __ovld vstore16(double16 data, size_t offset, __private double *p);\n" |
| 38545 | "#endif //cl_khr_fp64\n" |
| 38546 | "#ifdef cl_khr_fp16\n" |
| 38547 | "void __ovld vstore(half data, size_t offset, __global half *p);\n" |
| 38548 | "void __ovld vstore2(half2 data, size_t offset, __global half *p);\n" |
| 38549 | "void __ovld vstore3(half3 data, size_t offset, __global half *p);\n" |
| 38550 | "void __ovld vstore4(half4 data, size_t offset, __global half *p);\n" |
| 38551 | "void __ovld vstore8(half8 data, size_t offset, __global half *p);\n" |
| 38552 | "void __ovld vstore16(half16 data, size_t offset, __global half *p);\n" |
| 38553 | "void __ovld vstore(half data, size_t offset, __local half *p);\n" |
| 38554 | "void __ovld vstore2(half2 data, size_t offset, __local half *p);\n" |
| 38555 | "void __ovld vstore3(half3 data, size_t offset, __local half *p);\n" |
| 38556 | "void __ovld vstore4(half4 data, size_t offset, __local half *p);\n" |
| 38557 | "void __ovld vstore8(half8 data, size_t offset, __local half *p);\n" |
| 38558 | "void __ovld vstore16(half16 data, size_t offset, __local half *p);\n" |
| 38559 | "void __ovld vstore(half data, size_t offset, __private half *p);\n" |
| 38560 | "void __ovld vstore2(half2 data, size_t offset, __private half *p);\n" |
| 38561 | "void __ovld vstore3(half3 data, size_t offset, __private half *p);\n" |
| 38562 | "void __ovld vstore4(half4 data, size_t offset, __private half *p);\n" |
| 38563 | "void __ovld vstore8(half8 data, size_t offset, __private half *p);\n" |
| 38564 | "void __ovld vstore16(half16 data, size_t offset, __private half *p);\n" |
| 38565 | "#endif //cl_khr_fp16\n" |
| 38566 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38567 | "\n" |
| 38568 | "/**\n" |
| 38569 | " * Read sizeof (half) bytes of data from address\n" |
| 38570 | " * (p + offset). The data read is interpreted as a\n" |
| 38571 | " * half value. The half value is converted to a\n" |
| 38572 | " * float value and the float value is returned.\n" |
| 38573 | " * The read address computed as (p + offset)\n" |
| 38574 | " * must be 16-bit aligned.\n" |
| 38575 | " */\n" |
| 38576 | "float __ovld vload_half(size_t offset, const __constant half *p);\n" |
| 38577 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38578 | "float __ovld vload_half(size_t offset, const half *p);\n" |
| 38579 | "#else\n" |
| 38580 | "float __ovld vload_half(size_t offset, const __global half *p);\n" |
| 38581 | "float __ovld vload_half(size_t offset, const __local half *p);\n" |
| 38582 | "float __ovld vload_half(size_t offset, const __private half *p);\n" |
| 38583 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38584 | "\n" |
| 38585 | "/**\n" |
| 38586 | " * Read sizeof (halfn) bytes of data from address\n" |
| 38587 | " * (p + (offset * n)). The data read is interpreted\n" |
| 38588 | " * as a halfn value. The halfn value read is\n" |
| 38589 | " * converted to a floatn value and the floatn\n" |
| 38590 | " * value is returned. The read address computed\n" |
| 38591 | " * as (p + (offset * n)) must be 16-bit aligned.\n" |
| 38592 | " */\n" |
| 38593 | "float2 __ovld vload_half2(size_t offset, const __constant half *p);\n" |
| 38594 | "float3 __ovld vload_half3(size_t offset, const __constant half *p);\n" |
| 38595 | "float4 __ovld vload_half4(size_t offset, const __constant half *p);\n" |
| 38596 | "float8 __ovld vload_half8(size_t offset, const __constant half *p);\n" |
| 38597 | "float16 __ovld vload_half16(size_t offset, const __constant half *p);\n" |
| 38598 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38599 | "float2 __ovld vload_half2(size_t offset, const half *p);\n" |
| 38600 | "float3 __ovld vload_half3(size_t offset, const half *p);\n" |
| 38601 | "float4 __ovld vload_half4(size_t offset, const half *p);\n" |
| 38602 | "float8 __ovld vload_half8(size_t offset, const half *p);\n" |
| 38603 | "float16 __ovld vload_half16(size_t offset, const half *p);\n" |
| 38604 | "#else\n" |
| 38605 | "float2 __ovld vload_half2(size_t offset, const __global half *p);\n" |
| 38606 | "float3 __ovld vload_half3(size_t offset, const __global half *p);\n" |
| 38607 | "float4 __ovld vload_half4(size_t offset, const __global half *p);\n" |
| 38608 | "float8 __ovld vload_half8(size_t offset, const __global half *p);\n" |
| 38609 | "float16 __ovld vload_half16(size_t offset, const __global half *p);\n" |
| 38610 | "float2 __ovld vload_half2(size_t offset, const __local half *p);\n" |
| 38611 | "float3 __ovld vload_half3(size_t offset, const __local half *p);\n" |
| 38612 | "float4 __ovld vload_half4(size_t offset, const __local half *p);\n" |
| 38613 | "float8 __ovld vload_half8(size_t offset, const __local half *p);\n" |
| 38614 | "float16 __ovld vload_half16(size_t offset, const __local half *p);\n" |
| 38615 | "float2 __ovld vload_half2(size_t offset, const __private half *p);\n" |
| 38616 | "float3 __ovld vload_half3(size_t offset, const __private half *p);\n" |
| 38617 | "float4 __ovld vload_half4(size_t offset, const __private half *p);\n" |
| 38618 | "float8 __ovld vload_half8(size_t offset, const __private half *p);\n" |
| 38619 | "float16 __ovld vload_half16(size_t offset, const __private half *p);\n" |
| 38620 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38621 | "\n" |
| 38622 | "/**\n" |
| 38623 | " * The float value given by data is first\n" |
| 38624 | " * converted to a half value using the appropriate\n" |
| 38625 | " * rounding mode. The half value is then written\n" |
| 38626 | " * to address computed as (p + offset). The\n" |
| 38627 | " * address computed as (p + offset) must be 16-\n" |
| 38628 | " * bit aligned.\n" |
| 38629 | " * vstore_half use the current rounding mode.\n" |
| 38630 | " * The default current rounding mode is round to\n" |
| 38631 | " * nearest even.\n" |
| 38632 | " */\n" |
| 38633 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38634 | "void __ovld vstore_half(float data, size_t offset, half *p);\n" |
| 38635 | "void __ovld vstore_half_rte(float data, size_t offset, half *p);\n" |
| 38636 | "void __ovld vstore_half_rtz(float data, size_t offset, half *p);\n" |
| 38637 | "void __ovld vstore_half_rtp(float data, size_t offset, half *p);\n" |
| 38638 | "void __ovld vstore_half_rtn(float data, size_t offset, half *p);\n" |
| 38639 | "#ifdef cl_khr_fp64\n" |
| 38640 | "void __ovld vstore_half(double data, size_t offset, half *p);\n" |
| 38641 | "void __ovld vstore_half_rte(double data, size_t offset, half *p);\n" |
| 38642 | "void __ovld vstore_half_rtz(double data, size_t offset, half *p);\n" |
| 38643 | "void __ovld vstore_half_rtp(double data, size_t offset, half *p);\n" |
| 38644 | "void __ovld vstore_half_rtn(double data, size_t offset, half *p);\n" |
| 38645 | "#endif //cl_khr_fp64\n" |
| 38646 | "#else\n" |
| 38647 | "void __ovld vstore_half(float data, size_t offset, __global half *p);\n" |
| 38648 | "void __ovld vstore_half_rte(float data, size_t offset, __global half *p);\n" |
| 38649 | "void __ovld vstore_half_rtz(float data, size_t offset, __global half *p);\n" |
| 38650 | "void __ovld vstore_half_rtp(float data, size_t offset, __global half *p);\n" |
| 38651 | "void __ovld vstore_half_rtn(float data, size_t offset, __global half *p);\n" |
| 38652 | "void __ovld vstore_half(float data, size_t offset, __local half *p);\n" |
| 38653 | "void __ovld vstore_half_rte(float data, size_t offset, __local half *p);\n" |
| 38654 | "void __ovld vstore_half_rtz(float data, size_t offset, __local half *p);\n" |
| 38655 | "void __ovld vstore_half_rtp(float data, size_t offset, __local half *p);\n" |
| 38656 | "void __ovld vstore_half_rtn(float data, size_t offset, __local half *p);\n" |
| 38657 | "void __ovld vstore_half(float data, size_t offset, __private half *p);\n" |
| 38658 | "void __ovld vstore_half_rte(float data, size_t offset, __private half *p);\n" |
| 38659 | "void __ovld vstore_half_rtz(float data, size_t offset, __private half *p);\n" |
| 38660 | "void __ovld vstore_half_rtp(float data, size_t offset, __private half *p);\n" |
| 38661 | "void __ovld vstore_half_rtn(float data, size_t offset, __private half *p);\n" |
| 38662 | "#ifdef cl_khr_fp64\n" |
| 38663 | "void __ovld vstore_half(double data, size_t offset, __global half *p);\n" |
| 38664 | "void __ovld vstore_half_rte(double data, size_t offset, __global half *p);\n" |
| 38665 | "void __ovld vstore_half_rtz(double data, size_t offset, __global half *p);\n" |
| 38666 | "void __ovld vstore_half_rtp(double data, size_t offset, __global half *p);\n" |
| 38667 | "void __ovld vstore_half_rtn(double data, size_t offset, __global half *p);\n" |
| 38668 | "void __ovld vstore_half(double data, size_t offset, __local half *p);\n" |
| 38669 | "void __ovld vstore_half_rte(double data, size_t offset, __local half *p);\n" |
| 38670 | "void __ovld vstore_half_rtz(double data, size_t offset, __local half *p);\n" |
| 38671 | "void __ovld vstore_half_rtp(double data, size_t offset, __local half *p);\n" |
| 38672 | "void __ovld vstore_half_rtn(double data, size_t offset, __local half *p);\n" |
| 38673 | "void __ovld vstore_half(double data, size_t offset, __private half *p);\n" |
| 38674 | "void __ovld vstore_half_rte(double data, size_t offset, __private half *p);\n" |
| 38675 | "void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);\n" |
| 38676 | "void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);\n" |
| 38677 | "void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);\n" |
| 38678 | "#endif //cl_khr_fp64\n" |
| 38679 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38680 | "\n" |
| 38681 | "/**\n" |
| 38682 | " * The floatn value given by data is converted to\n" |
| 38683 | " * a halfn value using the appropriate rounding\n" |
| 38684 | " * mode. The halfn value is then written to\n" |
| 38685 | " * address computed as (p + (offset * n)). The\n" |
| 38686 | " * address computed as (p + (offset * n)) must be\n" |
| 38687 | " * 16-bit aligned.\n" |
| 38688 | " * vstore_halfn uses the current rounding mode.\n" |
| 38689 | " * The default current rounding mode is round to\n" |
| 38690 | " * nearest even.\n" |
| 38691 | " */\n" |
| 38692 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38693 | "void __ovld vstore_half2(float2 data, size_t offset, half *p);\n" |
| 38694 | "void __ovld vstore_half3(float3 data, size_t offset, half *p);\n" |
| 38695 | "void __ovld vstore_half4(float4 data, size_t offset, half *p);\n" |
| 38696 | "void __ovld vstore_half8(float8 data, size_t offset, half *p);\n" |
| 38697 | "void __ovld vstore_half16(float16 data, size_t offset, half *p);\n" |
| 38698 | "void __ovld vstore_half2_rte(float2 data, size_t offset, half *p);\n" |
| 38699 | "void __ovld vstore_half3_rte(float3 data, size_t offset, half *p);\n" |
| 38700 | "void __ovld vstore_half4_rte(float4 data, size_t offset, half *p);\n" |
| 38701 | "void __ovld vstore_half8_rte(float8 data, size_t offset, half *p);\n" |
| 38702 | "void __ovld vstore_half16_rte(float16 data, size_t offset, half *p);\n" |
| 38703 | "void __ovld vstore_half2_rtz(float2 data, size_t offset, half *p);\n" |
| 38704 | "void __ovld vstore_half3_rtz(float3 data, size_t offset, half *p);\n" |
| 38705 | "void __ovld vstore_half4_rtz(float4 data, size_t offset, half *p);\n" |
| 38706 | "void __ovld vstore_half8_rtz(float8 data, size_t offset, half *p);\n" |
| 38707 | "void __ovld vstore_half16_rtz(float16 data, size_t offset, half *p);\n" |
| 38708 | "void __ovld vstore_half2_rtp(float2 data, size_t offset, half *p);\n" |
| 38709 | "void __ovld vstore_half3_rtp(float3 data, size_t offset, half *p);\n" |
| 38710 | "void __ovld vstore_half4_rtp(float4 data, size_t offset, half *p);\n" |
| 38711 | "void __ovld vstore_half8_rtp(float8 data, size_t offset, half *p);\n" |
| 38712 | "void __ovld vstore_half16_rtp(float16 data, size_t offset, half *p);\n" |
| 38713 | "void __ovld vstore_half2_rtn(float2 data, size_t offset, half *p);\n" |
| 38714 | "void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p);\n" |
| 38715 | "void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p);\n" |
| 38716 | "void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p);\n" |
| 38717 | "void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p);\n" |
| 38718 | "#ifdef cl_khr_fp64\n" |
| 38719 | "void __ovld vstore_half2(double2 data, size_t offset, half *p);\n" |
| 38720 | "void __ovld vstore_half3(double3 data, size_t offset, half *p);\n" |
| 38721 | "void __ovld vstore_half4(double4 data, size_t offset, half *p);\n" |
| 38722 | "void __ovld vstore_half8(double8 data, size_t offset, half *p);\n" |
| 38723 | "void __ovld vstore_half16(double16 data, size_t offset, half *p);\n" |
| 38724 | "void __ovld vstore_half2_rte(double2 data, size_t offset, half *p);\n" |
| 38725 | "void __ovld vstore_half3_rte(double3 data, size_t offset, half *p);\n" |
| 38726 | "void __ovld vstore_half4_rte(double4 data, size_t offset, half *p);\n" |
| 38727 | "void __ovld vstore_half8_rte(double8 data, size_t offset, half *p);\n" |
| 38728 | "void __ovld vstore_half16_rte(double16 data, size_t offset, half *p);\n" |
| 38729 | "void __ovld vstore_half2_rtz(double2 data, size_t offset, half *p);\n" |
| 38730 | "void __ovld vstore_half3_rtz(double3 data, size_t offset, half *p);\n" |
| 38731 | "void __ovld vstore_half4_rtz(double4 data, size_t offset, half *p);\n" |
| 38732 | "void __ovld vstore_half8_rtz(double8 data, size_t offset, half *p);\n" |
| 38733 | "void __ovld vstore_half16_rtz(double16 data, size_t offset, half *p);\n" |
| 38734 | "void __ovld vstore_half2_rtp(double2 data, size_t offset, half *p);\n" |
| 38735 | "void __ovld vstore_half3_rtp(double3 data, size_t offset, half *p);\n" |
| 38736 | "void __ovld vstore_half4_rtp(double4 data, size_t offset, half *p);\n" |
| 38737 | "void __ovld vstore_half8_rtp(double8 data, size_t offset, half *p);\n" |
| 38738 | "void __ovld vstore_half16_rtp(double16 data, size_t offset, half *p);\n" |
| 38739 | "void __ovld vstore_half2_rtn(double2 data, size_t offset, half *p);\n" |
| 38740 | "void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p);\n" |
| 38741 | "void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p);\n" |
| 38742 | "void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p);\n" |
| 38743 | "void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p);\n" |
| 38744 | "#endif //cl_khr_fp64\n" |
| 38745 | "#else\n" |
| 38746 | "void __ovld vstore_half2(float2 data, size_t offset, __global half *p);\n" |
| 38747 | "void __ovld vstore_half3(float3 data, size_t offset, __global half *p);\n" |
| 38748 | "void __ovld vstore_half4(float4 data, size_t offset, __global half *p);\n" |
| 38749 | "void __ovld vstore_half8(float8 data, size_t offset, __global half *p);\n" |
| 38750 | "void __ovld vstore_half16(float16 data, size_t offset, __global half *p);\n" |
| 38751 | "void __ovld vstore_half2_rte(float2 data, size_t offset, __global half *p);\n" |
| 38752 | "void __ovld vstore_half3_rte(float3 data, size_t offset, __global half *p);\n" |
| 38753 | "void __ovld vstore_half4_rte(float4 data, size_t offset, __global half *p);\n" |
| 38754 | "void __ovld vstore_half8_rte(float8 data, size_t offset, __global half *p);\n" |
| 38755 | "void __ovld vstore_half16_rte(float16 data, size_t offset, __global half *p);\n" |
| 38756 | "void __ovld vstore_half2_rtz(float2 data, size_t offset, __global half *p);\n" |
| 38757 | "void __ovld vstore_half3_rtz(float3 data, size_t offset, __global half *p);\n" |
| 38758 | "void __ovld vstore_half4_rtz(float4 data, size_t offset, __global half *p);\n" |
| 38759 | "void __ovld vstore_half8_rtz(float8 data, size_t offset, __global half *p);\n" |
| 38760 | "void __ovld vstore_half16_rtz(float16 data, size_t offset, __global half *p);\n" |
| 38761 | "void __ovld vstore_half2_rtp(float2 data, size_t offset, __global half *p);\n" |
| 38762 | "void __ovld vstore_half3_rtp(float3 data, size_t offset, __global half *p);\n" |
| 38763 | "void __ovld vstore_half4_rtp(float4 data, size_t offset, __global half *p);\n" |
| 38764 | "void __ovld vstore_half8_rtp(float8 data, size_t offset, __global half *p);\n" |
| 38765 | "void __ovld vstore_half16_rtp(float16 data, size_t offset, __global half *p);\n" |
| 38766 | "void __ovld vstore_half2_rtn(float2 data, size_t offset, __global half *p);\n" |
| 38767 | "void __ovld vstore_half3_rtn(float3 data, size_t offset, __global half *p);\n" |
| 38768 | "void __ovld vstore_half4_rtn(float4 data, size_t offset, __global half *p);\n" |
| 38769 | "void __ovld vstore_half8_rtn(float8 data, size_t offset, __global half *p);\n" |
| 38770 | "void __ovld vstore_half16_rtn(float16 data, size_t offset, __global half *p);\n" |
| 38771 | "void __ovld vstore_half2(float2 data, size_t offset, __local half *p);\n" |
| 38772 | "void __ovld vstore_half3(float3 data, size_t offset, __local half *p);\n" |
| 38773 | "void __ovld vstore_half4(float4 data, size_t offset, __local half *p);\n" |
| 38774 | "void __ovld vstore_half8(float8 data, size_t offset, __local half *p);\n" |
| 38775 | "void __ovld vstore_half16(float16 data, size_t offset, __local half *p);\n" |
| 38776 | "void __ovld vstore_half2_rte(float2 data, size_t offset, __local half *p);\n" |
| 38777 | "void __ovld vstore_half3_rte(float3 data, size_t offset, __local half *p);\n" |
| 38778 | "void __ovld vstore_half4_rte(float4 data, size_t offset, __local half *p);\n" |
| 38779 | "void __ovld vstore_half8_rte(float8 data, size_t offset, __local half *p);\n" |
| 38780 | "void __ovld vstore_half16_rte(float16 data, size_t offset, __local half *p);\n" |
| 38781 | "void __ovld vstore_half2_rtz(float2 data, size_t offset, __local half *p);\n" |
| 38782 | "void __ovld vstore_half3_rtz(float3 data, size_t offset, __local half *p);\n" |
| 38783 | "void __ovld vstore_half4_rtz(float4 data, size_t offset, __local half *p);\n" |
| 38784 | "void __ovld vstore_half8_rtz(float8 data, size_t offset, __local half *p);\n" |
| 38785 | "void __ovld vstore_half16_rtz(float16 data, size_t offset, __local half *p);\n" |
| 38786 | "void __ovld vstore_half2_rtp(float2 data, size_t offset, __local half *p);\n" |
| 38787 | "void __ovld vstore_half3_rtp(float3 data, size_t offset, __local half *p);\n" |
| 38788 | "void __ovld vstore_half4_rtp(float4 data, size_t offset, __local half *p);\n" |
| 38789 | "void __ovld vstore_half8_rtp(float8 data, size_t offset, __local half *p);\n" |
| 38790 | "void __ovld vstore_half16_rtp(float16 data, size_t offset, __local half *p);\n" |
| 38791 | "void __ovld vstore_half2_rtn(float2 data, size_t offset, __local half *p);\n" |
| 38792 | "void __ovld vstore_half3_rtn(float3 data, size_t offset, __local half *p);\n" |
| 38793 | "void __ovld vstore_half4_rtn(float4 data, size_t offset, __local half *p);\n" |
| 38794 | "void __ovld vstore_half8_rtn(float8 data, size_t offset, __local half *p);\n" |
| 38795 | "void __ovld vstore_half16_rtn(float16 data, size_t offset, __local half *p);\n" |
| 38796 | "void __ovld vstore_half2(float2 data, size_t offset, __private half *p);\n" |
| 38797 | "void __ovld vstore_half3(float3 data, size_t offset, __private half *p);\n" |
| 38798 | "void __ovld vstore_half4(float4 data, size_t offset, __private half *p);\n" |
| 38799 | "void __ovld vstore_half8(float8 data, size_t offset, __private half *p);\n" |
| 38800 | "void __ovld vstore_half16(float16 data, size_t offset, __private half *p);\n" |
| 38801 | "void __ovld vstore_half2_rte(float2 data, size_t offset, __private half *p);\n" |
| 38802 | "void __ovld vstore_half3_rte(float3 data, size_t offset, __private half *p);\n" |
| 38803 | "void __ovld vstore_half4_rte(float4 data, size_t offset, __private half *p);\n" |
| 38804 | "void __ovld vstore_half8_rte(float8 data, size_t offset, __private half *p);\n" |
| 38805 | "void __ovld vstore_half16_rte(float16 data, size_t offset, __private half *p);\n" |
| 38806 | "void __ovld vstore_half2_rtz(float2 data, size_t offset, __private half *p);\n" |
| 38807 | "void __ovld vstore_half3_rtz(float3 data, size_t offset, __private half *p);\n" |
| 38808 | "void __ovld vstore_half4_rtz(float4 data, size_t offset, __private half *p);\n" |
| 38809 | "void __ovld vstore_half8_rtz(float8 data, size_t offset, __private half *p);\n" |
| 38810 | "void __ovld vstore_half16_rtz(float16 data, size_t offset, __private half *p);\n" |
| 38811 | "void __ovld vstore_half2_rtp(float2 data, size_t offset, __private half *p);\n" |
| 38812 | "void __ovld vstore_half3_rtp(float3 data, size_t offset, __private half *p);\n" |
| 38813 | "void __ovld vstore_half4_rtp(float4 data, size_t offset, __private half *p);\n" |
| 38814 | "void __ovld vstore_half8_rtp(float8 data, size_t offset, __private half *p);\n" |
| 38815 | "void __ovld vstore_half16_rtp(float16 data, size_t offset, __private half *p);\n" |
| 38816 | "void __ovld vstore_half2_rtn(float2 data, size_t offset, __private half *p);\n" |
| 38817 | "void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p);\n" |
| 38818 | "void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p);\n" |
| 38819 | "void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p);\n" |
| 38820 | "void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p);\n" |
| 38821 | "#ifdef cl_khr_fp64\n" |
| 38822 | "void __ovld vstore_half2(double2 data, size_t offset, __global half *p);\n" |
| 38823 | "void __ovld vstore_half3(double3 data, size_t offset, __global half *p);\n" |
| 38824 | "void __ovld vstore_half4(double4 data, size_t offset, __global half *p);\n" |
| 38825 | "void __ovld vstore_half8(double8 data, size_t offset, __global half *p);\n" |
| 38826 | "void __ovld vstore_half16(double16 data, size_t offset, __global half *p);\n" |
| 38827 | "void __ovld vstore_half2_rte(double2 data, size_t offset, __global half *p);\n" |
| 38828 | "void __ovld vstore_half3_rte(double3 data, size_t offset, __global half *p);\n" |
| 38829 | "void __ovld vstore_half4_rte(double4 data, size_t offset, __global half *p);\n" |
| 38830 | "void __ovld vstore_half8_rte(double8 data, size_t offset, __global half *p);\n" |
| 38831 | "void __ovld vstore_half16_rte(double16 data, size_t offset, __global half *p);\n" |
| 38832 | "void __ovld vstore_half2_rtz(double2 data, size_t offset, __global half *p);\n" |
| 38833 | "void __ovld vstore_half3_rtz(double3 data, size_t offset, __global half *p);\n" |
| 38834 | "void __ovld vstore_half4_rtz(double4 data, size_t offset, __global half *p);\n" |
| 38835 | "void __ovld vstore_half8_rtz(double8 data, size_t offset, __global half *p);\n" |
| 38836 | "void __ovld vstore_half16_rtz(double16 data, size_t offset, __global half *p);\n" |
| 38837 | "void __ovld vstore_half2_rtp(double2 data, size_t offset, __global half *p);\n" |
| 38838 | "void __ovld vstore_half3_rtp(double3 data, size_t offset, __global half *p);\n" |
| 38839 | "void __ovld vstore_half4_rtp(double4 data, size_t offset, __global half *p);\n" |
| 38840 | "void __ovld vstore_half8_rtp(double8 data, size_t offset, __global half *p);\n" |
| 38841 | "void __ovld vstore_half16_rtp(double16 data, size_t offset, __global half *p);\n" |
| 38842 | "void __ovld vstore_half2_rtn(double2 data, size_t offset, __global half *p);\n" |
| 38843 | "void __ovld vstore_half3_rtn(double3 data, size_t offset, __global half *p);\n" |
| 38844 | "void __ovld vstore_half4_rtn(double4 data, size_t offset, __global half *p);\n" |
| 38845 | "void __ovld vstore_half8_rtn(double8 data, size_t offset, __global half *p);\n" |
| 38846 | "void __ovld vstore_half16_rtn(double16 data, size_t offset, __global half *p);\n" |
| 38847 | "void __ovld vstore_half2(double2 data, size_t offset, __local half *p);\n" |
| 38848 | "void __ovld vstore_half3(double3 data, size_t offset, __local half *p);\n" |
| 38849 | "void __ovld vstore_half4(double4 data, size_t offset, __local half *p);\n" |
| 38850 | "void __ovld vstore_half8(double8 data, size_t offset, __local half *p);\n" |
| 38851 | "void __ovld vstore_half16(double16 data, size_t offset, __local half *p);\n" |
| 38852 | "void __ovld vstore_half2_rte(double2 data, size_t offset, __local half *p);\n" |
| 38853 | "void __ovld vstore_half3_rte(double3 data, size_t offset, __local half *p);\n" |
| 38854 | "void __ovld vstore_half4_rte(double4 data, size_t offset, __local half *p);\n" |
| 38855 | "void __ovld vstore_half8_rte(double8 data, size_t offset, __local half *p);\n" |
| 38856 | "void __ovld vstore_half16_rte(double16 data, size_t offset, __local half *p);\n" |
| 38857 | "void __ovld vstore_half2_rtz(double2 data, size_t offset, __local half *p);\n" |
| 38858 | "void __ovld vstore_half3_rtz(double3 data, size_t offset, __local half *p);\n" |
| 38859 | "void __ovld vstore_half4_rtz(double4 data, size_t offset, __local half *p);\n" |
| 38860 | "void __ovld vstore_half8_rtz(double8 data, size_t offset, __local half *p);\n" |
| 38861 | "void __ovld vstore_half16_rtz(double16 data, size_t offset, __local half *p);\n" |
| 38862 | "void __ovld vstore_half2_rtp(double2 data, size_t offset, __local half *p);\n" |
| 38863 | "void __ovld vstore_half3_rtp(double3 data, size_t offset, __local half *p);\n" |
| 38864 | "void __ovld vstore_half4_rtp(double4 data, size_t offset, __local half *p);\n" |
| 38865 | "void __ovld vstore_half8_rtp(double8 data, size_t offset, __local half *p);\n" |
| 38866 | "void __ovld vstore_half16_rtp(double16 data, size_t offset, __local half *p);\n" |
| 38867 | "void __ovld vstore_half2_rtn(double2 data, size_t offset, __local half *p);\n" |
| 38868 | "void __ovld vstore_half3_rtn(double3 data, size_t offset, __local half *p);\n" |
| 38869 | "void __ovld vstore_half4_rtn(double4 data, size_t offset, __local half *p);\n" |
| 38870 | "void __ovld vstore_half8_rtn(double8 data, size_t offset, __local half *p);\n" |
| 38871 | "void __ovld vstore_half16_rtn(double16 data, size_t offset, __local half *p);\n" |
| 38872 | "void __ovld vstore_half2(double2 data, size_t offset, __private half *p);\n" |
| 38873 | "void __ovld vstore_half3(double3 data, size_t offset, __private half *p);\n" |
| 38874 | "void __ovld vstore_half4(double4 data, size_t offset, __private half *p);\n" |
| 38875 | "void __ovld vstore_half8(double8 data, size_t offset, __private half *p);\n" |
| 38876 | "void __ovld vstore_half16(double16 data, size_t offset, __private half *p);\n" |
| 38877 | "void __ovld vstore_half2_rte(double2 data, size_t offset, __private half *p);\n" |
| 38878 | "void __ovld vstore_half3_rte(double3 data, size_t offset, __private half *p);\n" |
| 38879 | "void __ovld vstore_half4_rte(double4 data, size_t offset, __private half *p);\n" |
| 38880 | "void __ovld vstore_half8_rte(double8 data, size_t offset, __private half *p);\n" |
| 38881 | "void __ovld vstore_half16_rte(double16 data, size_t offset, __private half *p);\n" |
| 38882 | "void __ovld vstore_half2_rtz(double2 data, size_t offset, __private half *p);\n" |
| 38883 | "void __ovld vstore_half3_rtz(double3 data, size_t offset, __private half *p);\n" |
| 38884 | "void __ovld vstore_half4_rtz(double4 data, size_t offset, __private half *p);\n" |
| 38885 | "void __ovld vstore_half8_rtz(double8 data, size_t offset, __private half *p);\n" |
| 38886 | "void __ovld vstore_half16_rtz(double16 data, size_t offset, __private half *p);\n" |
| 38887 | "void __ovld vstore_half2_rtp(double2 data, size_t offset, __private half *p);\n" |
| 38888 | "void __ovld vstore_half3_rtp(double3 data, size_t offset, __private half *p);\n" |
| 38889 | "void __ovld vstore_half4_rtp(double4 data, size_t offset, __private half *p);\n" |
| 38890 | "void __ovld vstore_half8_rtp(double8 data, size_t offset, __private half *p);\n" |
| 38891 | "void __ovld vstore_half16_rtp(double16 data, size_t offset, __private half *p);\n" |
| 38892 | "void __ovld vstore_half2_rtn(double2 data, size_t offset, __private half *p);\n" |
| 38893 | "void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p);\n" |
| 38894 | "void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);\n" |
| 38895 | "void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);\n" |
| 38896 | "void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);\n" |
| 38897 | "#endif //cl_khr_fp64\n" |
| 38898 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38899 | "\n" |
| 38900 | "/**\n" |
| 38901 | " * For n = 1, 2, 4, 8 and 16 read sizeof (halfn)\n" |
| 38902 | " * bytes of data from address (p + (offset * n)).\n" |
| 38903 | " * The data read is interpreted as a halfn value.\n" |
| 38904 | " * The halfn value read is converted to a floatn\n" |
| 38905 | " * value and the floatn value is returned.\n" |
| 38906 | " * The address computed as (p + (offset * n))\n" |
| 38907 | " * must be aligned to sizeof (halfn) bytes.\n" |
| 38908 | " * For n = 3, vloada_half3 reads a half3 from\n" |
| 38909 | " * address (p + (offset * 4)) and returns a float3.\n" |
| 38910 | " * The address computed as (p + (offset * 4))\n" |
| 38911 | " * must be aligned to sizeof (half) * 4 bytes.\n" |
| 38912 | " */\n" |
| 38913 | "float __ovld vloada_half(size_t offset, const __constant half *p);\n" |
| 38914 | "float2 __ovld vloada_half2(size_t offset, const __constant half *p);\n" |
| 38915 | "float3 __ovld vloada_half3(size_t offset, const __constant half *p);\n" |
| 38916 | "float4 __ovld vloada_half4(size_t offset, const __constant half *p);\n" |
| 38917 | "float8 __ovld vloada_half8(size_t offset, const __constant half *p);\n" |
| 38918 | "float16 __ovld vloada_half16(size_t offset, const __constant half *p);\n" |
| 38919 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38920 | "float __ovld vloada_half(size_t offset, const half *p);\n" |
| 38921 | "float2 __ovld vloada_half2(size_t offset, const half *p);\n" |
| 38922 | "float3 __ovld vloada_half3(size_t offset, const half *p);\n" |
| 38923 | "float4 __ovld vloada_half4(size_t offset, const half *p);\n" |
| 38924 | "float8 __ovld vloada_half8(size_t offset, const half *p);\n" |
| 38925 | "float16 __ovld vloada_half16(size_t offset, const half *p);\n" |
| 38926 | "#else\n" |
| 38927 | "float __ovld vloada_half(size_t offset, const __global half *p);\n" |
| 38928 | "float2 __ovld vloada_half2(size_t offset, const __global half *p);\n" |
| 38929 | "float3 __ovld vloada_half3(size_t offset, const __global half *p);\n" |
| 38930 | "float4 __ovld vloada_half4(size_t offset, const __global half *p);\n" |
| 38931 | "float8 __ovld vloada_half8(size_t offset, const __global half *p);\n" |
| 38932 | "float16 __ovld vloada_half16(size_t offset, const __global half *p);\n" |
| 38933 | "float __ovld vloada_half(size_t offset, const __local half *p);\n" |
| 38934 | "float2 __ovld vloada_half2(size_t offset, const __local half *p);\n" |
| 38935 | "float3 __ovld vloada_half3(size_t offset, const __local half *p);\n" |
| 38936 | "float4 __ovld vloada_half4(size_t offset, const __local half *p);\n" |
| 38937 | "float8 __ovld vloada_half8(size_t offset, const __local half *p);\n" |
| 38938 | "float16 __ovld vloada_half16(size_t offset, const __local half *p);\n" |
| 38939 | "float __ovld vloada_half(size_t offset, const __private half *p);\n" |
| 38940 | "float2 __ovld vloada_half2(size_t offset, const __private half *p);\n" |
| 38941 | "float3 __ovld vloada_half3(size_t offset, const __private half *p);\n" |
| 38942 | "float4 __ovld vloada_half4(size_t offset, const __private half *p);\n" |
| 38943 | "float8 __ovld vloada_half8(size_t offset, const __private half *p);\n" |
| 38944 | "float16 __ovld vloada_half16(size_t offset, const __private half *p);\n" |
| 38945 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38946 | "\n" |
| 38947 | "/**\n" |
| 38948 | " * The floatn value given by data is converted to\n" |
| 38949 | " * a halfn value using the appropriate rounding\n" |
| 38950 | " * mode.\n" |
| 38951 | " * For n = 1, 2, 4, 8 and 16, the halfn value is\n" |
| 38952 | " * written to the address computed as (p + (offset\n" |
| 38953 | " * * n)). The address computed as (p + (offset *\n" |
| 38954 | " * n)) must be aligned to sizeof (halfn) bytes.\n" |
| 38955 | " * For n = 3, the half3 value is written to the\n" |
| 38956 | " * address computed as (p + (offset * 4)). The\n" |
| 38957 | " * address computed as (p + (offset * 4)) must be\n" |
| 38958 | " * aligned to sizeof (half) * 4 bytes.\n" |
| 38959 | " * vstorea_halfn uses the current rounding\n" |
| 38960 | " * mode. The default current rounding mode is\n" |
| 38961 | " * round to nearest even.\n" |
| 38962 | " */\n" |
| 38963 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 38964 | "void __ovld vstorea_half(float data, size_t offset, half *p);\n" |
| 38965 | "void __ovld vstorea_half2(float2 data, size_t offset, half *p);\n" |
| 38966 | "void __ovld vstorea_half3(float3 data, size_t offset, half *p);\n" |
| 38967 | "void __ovld vstorea_half4(float4 data, size_t offset, half *p);\n" |
| 38968 | "void __ovld vstorea_half8(float8 data, size_t offset, half *p);\n" |
| 38969 | "void __ovld vstorea_half16(float16 data, size_t offset, half *p);\n" |
| 38970 | "\n" |
| 38971 | "void __ovld vstorea_half_rte(float data, size_t offset, half *p);\n" |
| 38972 | "void __ovld vstorea_half2_rte(float2 data, size_t offset, half *p);\n" |
| 38973 | "void __ovld vstorea_half3_rte(float3 data, size_t offset, half *p);\n" |
| 38974 | "void __ovld vstorea_half4_rte(float4 data, size_t offset, half *p);\n" |
| 38975 | "void __ovld vstorea_half8_rte(float8 data, size_t offset, half *p);\n" |
| 38976 | "void __ovld vstorea_half16_rte(float16 data, size_t offset, half *p);\n" |
| 38977 | "\n" |
| 38978 | "void __ovld vstorea_half_rtz(float data, size_t offset, half *p);\n" |
| 38979 | "void __ovld vstorea_half2_rtz(float2 data, size_t offset, half *p);\n" |
| 38980 | "void __ovld vstorea_half3_rtz(float3 data, size_t offset, half *p);\n" |
| 38981 | "void __ovld vstorea_half4_rtz(float4 data, size_t offset, half *p);\n" |
| 38982 | "void __ovld vstorea_half8_rtz(float8 data, size_t offset, half *p);\n" |
| 38983 | "void __ovld vstorea_half16_rtz(float16 data, size_t offset, half *p);\n" |
| 38984 | "\n" |
| 38985 | "void __ovld vstorea_half_rtp(float data, size_t offset, half *p);\n" |
| 38986 | "void __ovld vstorea_half2_rtp(float2 data, size_t offset, half *p);\n" |
| 38987 | "void __ovld vstorea_half3_rtp(float3 data, size_t offset, half *p);\n" |
| 38988 | "void __ovld vstorea_half4_rtp(float4 data, size_t offset, half *p);\n" |
| 38989 | "void __ovld vstorea_half8_rtp(float8 data, size_t offset, half *p);\n" |
| 38990 | "void __ovld vstorea_half16_rtp(float16 data, size_t offset, half *p);\n" |
| 38991 | "\n" |
| 38992 | "void __ovld vstorea_half_rtn(float data, size_t offset, half *p);\n" |
| 38993 | "void __ovld vstorea_half2_rtn(float2 data, size_t offset, half *p);\n" |
| 38994 | "void __ovld vstorea_half3_rtn(float3 data, size_t offset, half *p);\n" |
| 38995 | "void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);\n" |
| 38996 | "void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);\n" |
| 38997 | "void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);\n" |
| 38998 | "\n" |
| 38999 | "#ifdef cl_khr_fp64\n" |
| 39000 | "void __ovld vstorea_half(double data, size_t offset, half *p);\n" |
| 39001 | "void __ovld vstorea_half2(double2 data, size_t offset, half *p);\n" |
| 39002 | "void __ovld vstorea_half3(double3 data, size_t offset, half *p);\n" |
| 39003 | "void __ovld vstorea_half4(double4 data, size_t offset, half *p);\n" |
| 39004 | "void __ovld vstorea_half8(double8 data, size_t offset, half *p);\n" |
| 39005 | "void __ovld vstorea_half16(double16 data, size_t offset, half *p);\n" |
| 39006 | "\n" |
| 39007 | "void __ovld vstorea_half_rte(double data, size_t offset, half *p);\n" |
| 39008 | "void __ovld vstorea_half2_rte(double2 data, size_t offset, half *p);\n" |
| 39009 | "void __ovld vstorea_half3_rte(double3 data, size_t offset, half *p);\n" |
| 39010 | "void __ovld vstorea_half4_rte(double4 data, size_t offset, half *p);\n" |
| 39011 | "void __ovld vstorea_half8_rte(double8 data, size_t offset, half *p);\n" |
| 39012 | "void __ovld vstorea_half16_rte(double16 data, size_t offset, half *p);\n" |
| 39013 | "\n" |
| 39014 | "void __ovld vstorea_half_rtz(double data, size_t offset, half *p);\n" |
| 39015 | "void __ovld vstorea_half2_rtz(double2 data, size_t offset, half *p);\n" |
| 39016 | "void __ovld vstorea_half3_rtz(double3 data, size_t offset, half *p);\n" |
| 39017 | "void __ovld vstorea_half4_rtz(double4 data, size_t offset, half *p);\n" |
| 39018 | "void __ovld vstorea_half8_rtz(double8 data, size_t offset, half *p);\n" |
| 39019 | "void __ovld vstorea_half16_rtz(double16 data, size_t offset, half *p);\n" |
| 39020 | "\n" |
| 39021 | "void __ovld vstorea_half_rtp(double data, size_t offset, half *p);\n" |
| 39022 | "void __ovld vstorea_half2_rtp(double2 data, size_t offset, half *p);\n" |
| 39023 | "void __ovld vstorea_half3_rtp(double3 data, size_t offset, half *p);\n" |
| 39024 | "void __ovld vstorea_half4_rtp(double4 data, size_t offset, half *p);\n" |
| 39025 | "void __ovld vstorea_half8_rtp(double8 data, size_t offset, half *p);\n" |
| 39026 | "void __ovld vstorea_half16_rtp(double16 data, size_t offset, half *p);\n" |
| 39027 | "\n" |
| 39028 | "void __ovld vstorea_half_rtn(double data, size_t offset, half *p);\n" |
| 39029 | "void __ovld vstorea_half2_rtn(double2 data, size_t offset, half *p);\n" |
| 39030 | "void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);\n" |
| 39031 | "void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);\n" |
| 39032 | "void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p);\n" |
| 39033 | "void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);\n" |
| 39034 | "#endif //cl_khr_fp64\n" |
| 39035 | "\n" |
| 39036 | "#else\n" |
| 39037 | "void __ovld vstorea_half(float data, size_t offset, __global half *p);\n" |
| 39038 | "void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);\n" |
| 39039 | "void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);\n" |
| 39040 | "void __ovld vstorea_half4(float4 data, size_t offset, __global half *p);\n" |
| 39041 | "void __ovld vstorea_half8(float8 data, size_t offset, __global half *p);\n" |
| 39042 | "void __ovld vstorea_half16(float16 data, size_t offset, __global half *p);\n" |
| 39043 | "\n" |
| 39044 | "void __ovld vstorea_half_rte(float data, size_t offset, __global half *p);\n" |
| 39045 | "void __ovld vstorea_half2_rte(float2 data, size_t offset, __global half *p);\n" |
| 39046 | "void __ovld vstorea_half3_rte(float3 data, size_t offset, __global half *p);\n" |
| 39047 | "void __ovld vstorea_half4_rte(float4 data, size_t offset, __global half *p);\n" |
| 39048 | "void __ovld vstorea_half8_rte(float8 data, size_t offset, __global half *p);\n" |
| 39049 | "void __ovld vstorea_half16_rte(float16 data, size_t offset, __global half *p);\n" |
| 39050 | "\n" |
| 39051 | "void __ovld vstorea_half_rtz(float data, size_t offset, __global half *p);\n" |
| 39052 | "void __ovld vstorea_half2_rtz(float2 data, size_t offset, __global half *p);\n" |
| 39053 | "void __ovld vstorea_half3_rtz(float3 data, size_t offset, __global half *p);\n" |
| 39054 | "void __ovld vstorea_half4_rtz(float4 data, size_t offset, __global half *p);\n" |
| 39055 | "void __ovld vstorea_half8_rtz(float8 data, size_t offset, __global half *p);\n" |
| 39056 | "void __ovld vstorea_half16_rtz(float16 data, size_t offset, __global half *p);\n" |
| 39057 | "\n" |
| 39058 | "void __ovld vstorea_half_rtp(float data, size_t offset, __global half *p);\n" |
| 39059 | "void __ovld vstorea_half2_rtp(float2 data, size_t offset, __global half *p);\n" |
| 39060 | "void __ovld vstorea_half3_rtp(float3 data, size_t offset, __global half *p);\n" |
| 39061 | "void __ovld vstorea_half4_rtp(float4 data, size_t offset, __global half *p);\n" |
| 39062 | "void __ovld vstorea_half8_rtp(float8 data, size_t offset, __global half *p);\n" |
| 39063 | "void __ovld vstorea_half16_rtp(float16 data, size_t offset, __global half *p);\n" |
| 39064 | "\n" |
| 39065 | "void __ovld vstorea_half_rtn(float data, size_t offset, __global half *p);\n" |
| 39066 | "void __ovld vstorea_half2_rtn(float2 data, size_t offset, __global half *p);\n" |
| 39067 | "void __ovld vstorea_half3_rtn(float3 data, size_t offset, __global half *p);\n" |
| 39068 | "void __ovld vstorea_half4_rtn(float4 data, size_t offset, __global half *p);\n" |
| 39069 | "void __ovld vstorea_half8_rtn(float8 data, size_t offset, __global half *p);\n" |
| 39070 | "void __ovld vstorea_half16_rtn(float16 data, size_t offset, __global half *p);\n" |
| 39071 | "\n" |
| 39072 | "void __ovld vstorea_half(float data, size_t offset, __local half *p);\n" |
| 39073 | "void __ovld vstorea_half2(float2 data, size_t offset, __local half *p);\n" |
| 39074 | "void __ovld vstorea_half3(float3 data, size_t offset, __local half *p);\n" |
| 39075 | "void __ovld vstorea_half4(float4 data, size_t offset, __local half *p);\n" |
| 39076 | "void __ovld vstorea_half8(float8 data, size_t offset, __local half *p);\n" |
| 39077 | "void __ovld vstorea_half16(float16 data, size_t offset, __local half *p);\n" |
| 39078 | "\n" |
| 39079 | "void __ovld vstorea_half_rte(float data, size_t offset, __local half *p);\n" |
| 39080 | "void __ovld vstorea_half2_rte(float2 data, size_t offset, __local half *p);\n" |
| 39081 | "void __ovld vstorea_half3_rte(float3 data, size_t offset, __local half *p);\n" |
| 39082 | "void __ovld vstorea_half4_rte(float4 data, size_t offset, __local half *p);\n" |
| 39083 | "void __ovld vstorea_half8_rte(float8 data, size_t offset, __local half *p);\n" |
| 39084 | "void __ovld vstorea_half16_rte(float16 data, size_t offset, __local half *p);\n" |
| 39085 | "\n" |
| 39086 | "void __ovld vstorea_half_rtz(float data, size_t offset, __local half *p);\n" |
| 39087 | "void __ovld vstorea_half2_rtz(float2 data, size_t offset, __local half *p);\n" |
| 39088 | "void __ovld vstorea_half3_rtz(float3 data, size_t offset, __local half *p);\n" |
| 39089 | "void __ovld vstorea_half4_rtz(float4 data, size_t offset, __local half *p);\n" |
| 39090 | "void __ovld vstorea_half8_rtz(float8 data, size_t offset, __local half *p);\n" |
| 39091 | "void __ovld vstorea_half16_rtz(float16 data, size_t offset, __local half *p);\n" |
| 39092 | "\n" |
| 39093 | "void __ovld vstorea_half_rtp(float data, size_t offset, __local half *p);\n" |
| 39094 | "void __ovld vstorea_half2_rtp(float2 data, size_t offset, __local half *p);\n" |
| 39095 | "void __ovld vstorea_half3_rtp(float3 data, size_t offset, __local half *p);\n" |
| 39096 | "void __ovld vstorea_half4_rtp(float4 data, size_t offset, __local half *p);\n" |
| 39097 | "void __ovld vstorea_half8_rtp(float8 data, size_t offset, __local half *p);\n" |
| 39098 | "void __ovld vstorea_half16_rtp(float16 data, size_t offset, __local half *p);\n" |
| 39099 | "\n" |
| 39100 | "void __ovld vstorea_half_rtn(float data, size_t offset, __local half *p);\n" |
| 39101 | "void __ovld vstorea_half2_rtn(float2 data, size_t offset, __local half *p);\n" |
| 39102 | "void __ovld vstorea_half3_rtn(float3 data, size_t offset, __local half *p);\n" |
| 39103 | "void __ovld vstorea_half4_rtn(float4 data, size_t offset, __local half *p);\n" |
| 39104 | "void __ovld vstorea_half8_rtn(float8 data, size_t offset, __local half *p);\n" |
| 39105 | "void __ovld vstorea_half16_rtn(float16 data, size_t offset, __local half *p);\n" |
| 39106 | "\n" |
| 39107 | "void __ovld vstorea_half(float data, size_t offset, __private half *p);\n" |
| 39108 | "void __ovld vstorea_half2(float2 data, size_t offset, __private half *p);\n" |
| 39109 | "void __ovld vstorea_half3(float3 data, size_t offset, __private half *p);\n" |
| 39110 | "void __ovld vstorea_half4(float4 data, size_t offset, __private half *p);\n" |
| 39111 | "void __ovld vstorea_half8(float8 data, size_t offset, __private half *p);\n" |
| 39112 | "void __ovld vstorea_half16(float16 data, size_t offset, __private half *p);\n" |
| 39113 | "\n" |
| 39114 | "void __ovld vstorea_half_rte(float data, size_t offset, __private half *p);\n" |
| 39115 | "void __ovld vstorea_half2_rte(float2 data, size_t offset, __private half *p);\n" |
| 39116 | "void __ovld vstorea_half3_rte(float3 data, size_t offset, __private half *p);\n" |
| 39117 | "void __ovld vstorea_half4_rte(float4 data, size_t offset, __private half *p);\n" |
| 39118 | "void __ovld vstorea_half8_rte(float8 data, size_t offset, __private half *p);\n" |
| 39119 | "void __ovld vstorea_half16_rte(float16 data, size_t offset, __private half *p);\n" |
| 39120 | "\n" |
| 39121 | "void __ovld vstorea_half_rtz(float data, size_t offset, __private half *p);\n" |
| 39122 | "void __ovld vstorea_half2_rtz(float2 data, size_t offset, __private half *p);\n" |
| 39123 | "void __ovld vstorea_half3_rtz(float3 data, size_t offset, __private half *p);\n" |
| 39124 | "void __ovld vstorea_half4_rtz(float4 data, size_t offset, __private half *p);\n" |
| 39125 | "void __ovld vstorea_half8_rtz(float8 data, size_t offset, __private half *p);\n" |
| 39126 | "void __ovld vstorea_half16_rtz(float16 data, size_t offset, __private half *p);\n" |
| 39127 | "\n" |
| 39128 | "void __ovld vstorea_half_rtp(float data, size_t offset, __private half *p);\n" |
| 39129 | "void __ovld vstorea_half2_rtp(float2 data, size_t offset, __private half *p);\n" |
| 39130 | "void __ovld vstorea_half3_rtp(float3 data, size_t offset, __private half *p);\n" |
| 39131 | "void __ovld vstorea_half4_rtp(float4 data, size_t offset, __private half *p);\n" |
| 39132 | "void __ovld vstorea_half8_rtp(float8 data, size_t offset, __private half *p);\n" |
| 39133 | "void __ovld vstorea_half16_rtp(float16 data, size_t offset, __private half *p);\n" |
| 39134 | "\n" |
| 39135 | "void __ovld vstorea_half_rtn(float data, size_t offset, __private half *p);\n" |
| 39136 | "void __ovld vstorea_half2_rtn(float2 data, size_t offset, __private half *p);\n" |
| 39137 | "void __ovld vstorea_half3_rtn(float3 data, size_t offset, __private half *p);\n" |
| 39138 | "void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);\n" |
| 39139 | "void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);\n" |
| 39140 | "void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);\n" |
| 39141 | "\n" |
| 39142 | "#ifdef cl_khr_fp64\n" |
| 39143 | "void __ovld vstorea_half(double data, size_t offset, __global half *p);\n" |
| 39144 | "void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);\n" |
| 39145 | "void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);\n" |
| 39146 | "void __ovld vstorea_half4(double4 data, size_t offset, __global half *p);\n" |
| 39147 | "void __ovld vstorea_half8(double8 data, size_t offset, __global half *p);\n" |
| 39148 | "void __ovld vstorea_half16(double16 data, size_t offset, __global half *p);\n" |
| 39149 | "\n" |
| 39150 | "void __ovld vstorea_half_rte(double data, size_t offset, __global half *p);\n" |
| 39151 | "void __ovld vstorea_half2_rte(double2 data, size_t offset, __global half *p);\n" |
| 39152 | "void __ovld vstorea_half3_rte(double3 data, size_t offset, __global half *p);\n" |
| 39153 | "void __ovld vstorea_half4_rte(double4 data, size_t offset, __global half *p);\n" |
| 39154 | "void __ovld vstorea_half8_rte(double8 data, size_t offset, __global half *p);\n" |
| 39155 | "void __ovld vstorea_half16_rte(double16 data, size_t offset, __global half *p);\n" |
| 39156 | "\n" |
| 39157 | "void __ovld vstorea_half_rtz(double data, size_t offset, __global half *p);\n" |
| 39158 | "void __ovld vstorea_half2_rtz(double2 data, size_t offset, __global half *p);\n" |
| 39159 | "void __ovld vstorea_half3_rtz(double3 data, size_t offset, __global half *p);\n" |
| 39160 | "void __ovld vstorea_half4_rtz(double4 data, size_t offset, __global half *p);\n" |
| 39161 | "void __ovld vstorea_half8_rtz(double8 data, size_t offset, __global half *p);\n" |
| 39162 | "void __ovld vstorea_half16_rtz(double16 data, size_t offset, __global half *p);\n" |
| 39163 | "\n" |
| 39164 | "void __ovld vstorea_half_rtp(double data, size_t offset, __global half *p);\n" |
| 39165 | "void __ovld vstorea_half2_rtp(double2 data, size_t offset, __global half *p);\n" |
| 39166 | "void __ovld vstorea_half3_rtp(double3 data, size_t offset, __global half *p);\n" |
| 39167 | "void __ovld vstorea_half4_rtp(double4 data, size_t offset, __global half *p);\n" |
| 39168 | "void __ovld vstorea_half8_rtp(double8 data, size_t offset, __global half *p);\n" |
| 39169 | "void __ovld vstorea_half16_rtp(double16 data, size_t offset, __global half *p);\n" |
| 39170 | "\n" |
| 39171 | "void __ovld vstorea_half_rtn(double data, size_t offset, __global half *p);\n" |
| 39172 | "void __ovld vstorea_half2_rtn(double2 data, size_t offset, __global half *p);\n" |
| 39173 | "void __ovld vstorea_half3_rtn(double3 data, size_t offset, __global half *p);\n" |
| 39174 | "void __ovld vstorea_half4_rtn(double4 data, size_t offset, __global half *p);\n" |
| 39175 | "void __ovld vstorea_half8_rtn(double8 data, size_t offset, __global half *p);\n" |
| 39176 | "void __ovld vstorea_half16_rtn(double16 data, size_t offset, __global half *p);\n" |
| 39177 | "\n" |
| 39178 | "void __ovld vstorea_half(double data, size_t offset, __local half *p);\n" |
| 39179 | "void __ovld vstorea_half2(double2 data, size_t offset, __local half *p);\n" |
| 39180 | "void __ovld vstorea_half3(double3 data, size_t offset, __local half *p);\n" |
| 39181 | "void __ovld vstorea_half4(double4 data, size_t offset, __local half *p);\n" |
| 39182 | "void __ovld vstorea_half8(double8 data, size_t offset, __local half *p);\n" |
| 39183 | "void __ovld vstorea_half16(double16 data, size_t offset, __local half *p);\n" |
| 39184 | "\n" |
| 39185 | "void __ovld vstorea_half_rte(double data, size_t offset, __local half *p);\n" |
| 39186 | "void __ovld vstorea_half2_rte(double2 data, size_t offset, __local half *p);\n" |
| 39187 | "void __ovld vstorea_half3_rte(double3 data, size_t offset, __local half *p);\n" |
| 39188 | "void __ovld vstorea_half4_rte(double4 data, size_t offset, __local half *p);\n" |
| 39189 | "void __ovld vstorea_half8_rte(double8 data, size_t offset, __local half *p);\n" |
| 39190 | "void __ovld vstorea_half16_rte(double16 data, size_t offset, __local half *p);\n" |
| 39191 | "\n" |
| 39192 | "void __ovld vstorea_half_rtz(double data, size_t offset, __local half *p);\n" |
| 39193 | "void __ovld vstorea_half2_rtz(double2 data, size_t offset, __local half *p);\n" |
| 39194 | "void __ovld vstorea_half3_rtz(double3 data, size_t offset, __local half *p);\n" |
| 39195 | "void __ovld vstorea_half4_rtz(double4 data, size_t offset, __local half *p);\n" |
| 39196 | "void __ovld vstorea_half8_rtz(double8 data, size_t offset, __local half *p);\n" |
| 39197 | "void __ovld vstorea_half16_rtz(double16 data, size_t offset, __local half *p);\n" |
| 39198 | "\n" |
| 39199 | "void __ovld vstorea_half_rtp(double data, size_t offset, __local half *p);\n" |
| 39200 | "void __ovld vstorea_half2_rtp(double2 data, size_t offset, __local half *p);\n" |
| 39201 | "void __ovld vstorea_half3_rtp(double3 data, size_t offset, __local half *p);\n" |
| 39202 | "void __ovld vstorea_half4_rtp(double4 data, size_t offset, __local half *p);\n" |
| 39203 | "void __ovld vstorea_half8_rtp(double8 data, size_t offset, __local half *p);\n" |
| 39204 | "void __ovld vstorea_half16_rtp(double16 data, size_t offset, __local half *p);\n" |
| 39205 | "\n" |
| 39206 | "void __ovld vstorea_half_rtn(double data, size_t offset, __local half *p);\n" |
| 39207 | "void __ovld vstorea_half2_rtn(double2 data, size_t offset, __local half *p);\n" |
| 39208 | "void __ovld vstorea_half3_rtn(double3 data, size_t offset, __local half *p);\n" |
| 39209 | "void __ovld vstorea_half4_rtn(double4 data, size_t offset, __local half *p);\n" |
| 39210 | "void __ovld vstorea_half8_rtn(double8 data, size_t offset, __local half *p);\n" |
| 39211 | "void __ovld vstorea_half16_rtn(double16 data, size_t offset, __local half *p);\n" |
| 39212 | "\n" |
| 39213 | "void __ovld vstorea_half(double data, size_t offset, __private half *p);\n" |
| 39214 | "void __ovld vstorea_half2(double2 data, size_t offset, __private half *p);\n" |
| 39215 | "void __ovld vstorea_half3(double3 data, size_t offset, __private half *p);\n" |
| 39216 | "void __ovld vstorea_half4(double4 data, size_t offset, __private half *p);\n" |
| 39217 | "void __ovld vstorea_half8(double8 data, size_t offset, __private half *p);\n" |
| 39218 | "void __ovld vstorea_half16(double16 data, size_t offset, __private half *p);\n" |
| 39219 | "\n" |
| 39220 | "void __ovld vstorea_half_rte(double data, size_t offset, __private half *p);\n" |
| 39221 | "void __ovld vstorea_half2_rte(double2 data, size_t offset, __private half *p);\n" |
| 39222 | "void __ovld vstorea_half3_rte(double3 data, size_t offset, __private half *p);\n" |
| 39223 | "void __ovld vstorea_half4_rte(double4 data, size_t offset, __private half *p);\n" |
| 39224 | "void __ovld vstorea_half8_rte(double8 data, size_t offset, __private half *p);\n" |
| 39225 | "void __ovld vstorea_half16_rte(double16 data, size_t offset, __private half *p);\n" |
| 39226 | "\n" |
| 39227 | "void __ovld vstorea_half_rtz(double data, size_t offset, __private half *p);\n" |
| 39228 | "void __ovld vstorea_half2_rtz(double2 data, size_t offset, __private half *p);\n" |
| 39229 | "void __ovld vstorea_half3_rtz(double3 data, size_t offset, __private half *p);\n" |
| 39230 | "void __ovld vstorea_half4_rtz(double4 data, size_t offset, __private half *p);\n" |
| 39231 | "void __ovld vstorea_half8_rtz(double8 data, size_t offset, __private half *p);\n" |
| 39232 | "void __ovld vstorea_half16_rtz(double16 data, size_t offset, __private half *p);\n" |
| 39233 | "\n" |
| 39234 | "void __ovld vstorea_half_rtp(double data, size_t offset, __private half *p);\n" |
| 39235 | "void __ovld vstorea_half2_rtp(double2 data, size_t offset, __private half *p);\n" |
| 39236 | "void __ovld vstorea_half3_rtp(double3 data, size_t offset, __private half *p);\n" |
| 39237 | "void __ovld vstorea_half4_rtp(double4 data, size_t offset, __private half *p);\n" |
| 39238 | "void __ovld vstorea_half8_rtp(double8 data, size_t offset, __private half *p);\n" |
| 39239 | "void __ovld vstorea_half16_rtp(double16 data, size_t offset, __private half *p);\n" |
| 39240 | "\n" |
| 39241 | "void __ovld vstorea_half_rtn(double data, size_t offset, __private half *p);\n" |
| 39242 | "void __ovld vstorea_half2_rtn(double2 data,size_t offset, __private half *p);\n" |
| 39243 | "void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);\n" |
| 39244 | "void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);\n" |
| 39245 | "void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);\n" |
| 39246 | "void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);\n" |
| 39247 | "#endif //cl_khr_fp64\n" |
| 39248 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 39249 | "\n" |
| 39250 | "// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions\n" |
| 39251 | "\n" |
| 39252 | "// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence\n" |
| 39253 | "typedef uint cl_mem_fence_flags;\n" |
| 39254 | "\n" |
| 39255 | "/**\n" |
| 39256 | " * Queue a memory fence to ensure correct\n" |
| 39257 | " * ordering of memory operations to local memory\n" |
| 39258 | " */\n" |
| 39259 | "#define CLK_LOCAL_MEM_FENCE 0x01\n" |
| 39260 | "\n" |
| 39261 | "/**\n" |
| 39262 | " * Queue a memory fence to ensure correct\n" |
| 39263 | " * ordering of memory operations to global memory\n" |
| 39264 | " */\n" |
| 39265 | "#define CLK_GLOBAL_MEM_FENCE 0x02\n" |
| 39266 | "\n" |
| 39267 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 39268 | "/**\n" |
| 39269 | " * Queue a memory fence to ensure correct ordering of memory\n" |
| 39270 | " * operations between work-items of a work-group to\n" |
| 39271 | " * image memory.\n" |
| 39272 | " */\n" |
| 39273 | "#define CLK_IMAGE_MEM_FENCE 0x04\n" |
| 39274 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 39275 | "\n" |
| 39276 | "/**\n" |
| 39277 | " * All work-items in a work-group executing the kernel\n" |
| 39278 | " * on a processor must execute this function before any\n" |
| 39279 | " * are allowed to continue execution beyond the barrier.\n" |
| 39280 | " * This function must be encountered by all work-items in\n" |
| 39281 | " * a work-group executing the kernel.\n" |
| 39282 | " * If barrier is inside a conditional statement, then all\n" |
| 39283 | " * work-items must enter the conditional if any work-item\n" |
| 39284 | " * enters the conditional statement and executes the\n" |
| 39285 | " * barrier.\n" |
| 39286 | " * If barrer is inside a loop, all work-items must execute\n" |
| 39287 | " * the barrier for each iteration of the loop before any are\n" |
| 39288 | " * allowed to continue execution beyond the barrier.\n" |
| 39289 | " * The barrier function also queues a memory fence\n" |
| 39290 | " * (reads and writes) to ensure correct ordering of\n" |
| 39291 | " * memory operations to local or global memory.\n" |
| 39292 | " * The flags argument specifies the memory address space\n" |
| 39293 | " * and can be set to a combination of the following literal\n" |
| 39294 | " * values.\n" |
| 39295 | " * CLK_LOCAL_MEM_FENCE - The barrier function\n" |
| 39296 | " * will either flush any variables stored in local memory\n" |
| 39297 | " * or queue a memory fence to ensure correct ordering of\n" |
| 39298 | " * memory operations to local memory.\n" |
| 39299 | " * CLK_GLOBAL_MEM_FENCE - The barrier function\n" |
| 39300 | " * will queue a memory fence to ensure correct ordering\n" |
| 39301 | " * of memory operations to global memory. This can be\n" |
| 39302 | " * useful when work-items, for example, write to buffer or\n" |
| 39303 | " * image objects and then want to read the updated data.\n" |
| 39304 | " */\n" |
| 39305 | "\n" |
| 39306 | "void __ovld __conv barrier(cl_mem_fence_flags flags);\n" |
| 39307 | "\n" |
| 39308 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 39309 | "\n" |
| 39310 | "typedef enum memory_scope {\n" |
| 39311 | " memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,\n" |
| 39312 | " memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,\n" |
| 39313 | " memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,\n" |
| 39314 | " memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,\n" |
| 39315 | "#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n" |
| 39316 | " memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP\n" |
| 39317 | "#endif\n" |
| 39318 | "} memory_scope;\n" |
| 39319 | "\n" |
| 39320 | "void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n" |
| 39321 | "void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);\n" |
| 39322 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 39323 | "\n" |
| 39324 | "// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions\n" |
| 39325 | "\n" |
| 39326 | "/**\n" |
| 39327 | " * Orders loads and stores of a work-item\n" |
| 39328 | " * executing a kernel. This means that loads\n" |
| 39329 | " * and stores preceding the mem_fence will\n" |
| 39330 | " * be committed to memory before any loads\n" |
| 39331 | " * and stores following the mem_fence.\n" |
| 39332 | " * The flags argument specifies the memory\n" |
| 39333 | " * address space and can be set to a\n" |
| 39334 | " * combination of the following literal\n" |
| 39335 | " * values:\n" |
| 39336 | " * CLK_LOCAL_MEM_FENCE\n" |
| 39337 | " * CLK_GLOBAL_MEM_FENCE.\n" |
| 39338 | " */\n" |
| 39339 | "void __ovld mem_fence(cl_mem_fence_flags flags);\n" |
| 39340 | "\n" |
| 39341 | "/**\n" |
| 39342 | " * Read memory barrier that orders only\n" |
| 39343 | " * loads.\n" |
| 39344 | " * The flags argument specifies the memory\n" |
| 39345 | " * address space and can be set to a\n" |
| 39346 | " * combination of the following literal\n" |
| 39347 | " * values:\n" |
| 39348 | " * CLK_LOCAL_MEM_FENCE\n" |
| 39349 | " * CLK_GLOBAL_MEM_FENCE.\n" |
| 39350 | " */\n" |
| 39351 | "void __ovld read_mem_fence(cl_mem_fence_flags flags);\n" |
| 39352 | "\n" |
| 39353 | "/**\n" |
| 39354 | " * Write memory barrier that orders only\n" |
| 39355 | " * stores.\n" |
| 39356 | " * The flags argument specifies the memory\n" |
| 39357 | " * address space and can be set to a\n" |
| 39358 | " * combination of the following literal\n" |
| 39359 | " * values:\n" |
| 39360 | " * CLK_LOCAL_MEM_FENCE\n" |
| 39361 | " * CLK_GLOBAL_MEM_FENCE.\n" |
| 39362 | " */\n" |
| 39363 | "void __ovld write_mem_fence(cl_mem_fence_flags flags);\n" |
| 39364 | "\n" |
| 39365 | "// OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions\n" |
| 39366 | "\n" |
| 39367 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 39368 | "cl_mem_fence_flags __ovld get_fence(const void *ptr);\n" |
| 39369 | "cl_mem_fence_flags __ovld get_fence(void *ptr);\n" |
| 39370 | "\n" |
| 39371 | "/**\n" |
| 39372 | " * Builtin functions to_global, to_local, and to_private need to be declared as Clang builtin functions\n" |
| 39373 | " * and checked in Sema since they should be declared as\n" |
| 39374 | " * addr gentype* to_addr (gentype*);\n" |
| 39375 | " * where gentype is builtin type or user defined type.\n" |
| 39376 | " */\n" |
| 39377 | "\n" |
| 39378 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 39379 | "\n" |
| 39380 | "// OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch\n" |
| 39381 | "\n" |
| 39382 | "/**\n" |
| 39383 | " * event_t async_work_group_copy (\n" |
| 39384 | " * __global gentype *dst,\n" |
| 39385 | " * const __local gentype *src,\n" |
| 39386 | " * size_t num_elements,\n" |
| 39387 | " * event_t event)\n" |
| 39388 | " * Perform an async copy of num_elements\n" |
| 39389 | " * gentype elements from src to dst. The async\n" |
| 39390 | " * copy is performed by all work-items in a workgroup\n" |
| 39391 | " * and this built-in function must therefore\n" |
| 39392 | " * be encountered by all work-items in a workgroup\n" |
| 39393 | " * executing the kernel with the same\n" |
| 39394 | " * argument values; otherwise the results are\n" |
| 39395 | " * undefined.\n" |
| 39396 | " * Returns an event object that can be used by\n" |
| 39397 | " * wait_group_events to wait for the async copy\n" |
| 39398 | " * to finish. The event argument can also be used\n" |
| 39399 | " * to associate the async_work_group_copy with\n" |
| 39400 | " * a previous async copy allowing an event to be\n" |
| 39401 | " * shared by multiple async copies; otherwise event\n" |
| 39402 | " * should be zero.\n" |
| 39403 | " * If event argument is non-zero, the event object\n" |
| 39404 | " * supplied in event argument will be returned.\n" |
| 39405 | " * This function does not perform any implicit\n" |
| 39406 | " * synchronization of source data such as using a\n" |
| 39407 | " * barrier before performing the copy.\n" |
| 39408 | " */\n" |
| 39409 | "event_t __ovld async_work_group_copy(__local char *dst, const __global char *src, size_t num_elements, event_t event);\n" |
| 39410 | "event_t __ovld async_work_group_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, event_t event);\n" |
| 39411 | "event_t __ovld async_work_group_copy(__local short *dst, const __global short *src, size_t num_elements, event_t event);\n" |
| 39412 | "event_t __ovld async_work_group_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, event_t event);\n" |
| 39413 | "event_t __ovld async_work_group_copy(__local int *dst, const __global int *src, size_t num_elements, event_t event);\n" |
| 39414 | "event_t __ovld async_work_group_copy(__local uint *dst, const __global uint *src, size_t num_elements, event_t event);\n" |
| 39415 | "event_t __ovld async_work_group_copy(__local long *dst, const __global long *src, size_t num_elements, event_t event);\n" |
| 39416 | "event_t __ovld async_work_group_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, event_t event);\n" |
| 39417 | "event_t __ovld async_work_group_copy(__local float *dst, const __global float *src, size_t num_elements, event_t event);\n" |
| 39418 | "event_t __ovld async_work_group_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, event_t event);\n" |
| 39419 | "event_t __ovld async_work_group_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, event_t event);\n" |
| 39420 | "event_t __ovld async_work_group_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, event_t event);\n" |
| 39421 | "event_t __ovld async_work_group_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, event_t event);\n" |
| 39422 | "event_t __ovld async_work_group_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, event_t event);\n" |
| 39423 | "event_t __ovld async_work_group_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, event_t event);\n" |
| 39424 | "event_t __ovld async_work_group_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, event_t event);\n" |
| 39425 | "event_t __ovld async_work_group_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, event_t event);\n" |
| 39426 | "event_t __ovld async_work_group_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, event_t event);\n" |
| 39427 | "event_t __ovld async_work_group_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, event_t event);\n" |
| 39428 | "event_t __ovld async_work_group_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, event_t event);\n" |
| 39429 | "event_t __ovld async_work_group_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, event_t event);\n" |
| 39430 | "event_t __ovld async_work_group_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, event_t event);\n" |
| 39431 | "event_t __ovld async_work_group_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, event_t event);\n" |
| 39432 | "event_t __ovld async_work_group_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, event_t event);\n" |
| 39433 | "event_t __ovld async_work_group_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, event_t event);\n" |
| 39434 | "event_t __ovld async_work_group_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, event_t event);\n" |
| 39435 | "event_t __ovld async_work_group_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, event_t event);\n" |
| 39436 | "event_t __ovld async_work_group_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, event_t event);\n" |
| 39437 | "event_t __ovld async_work_group_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, event_t event);\n" |
| 39438 | "event_t __ovld async_work_group_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, event_t event);\n" |
| 39439 | "event_t __ovld async_work_group_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, event_t event);\n" |
| 39440 | "event_t __ovld async_work_group_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, event_t event);\n" |
| 39441 | "event_t __ovld async_work_group_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, event_t event);\n" |
| 39442 | "event_t __ovld async_work_group_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, event_t event);\n" |
| 39443 | "event_t __ovld async_work_group_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, event_t event);\n" |
| 39444 | "event_t __ovld async_work_group_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, event_t event);\n" |
| 39445 | "event_t __ovld async_work_group_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, event_t event);\n" |
| 39446 | "event_t __ovld async_work_group_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, event_t event);\n" |
| 39447 | "event_t __ovld async_work_group_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, event_t event);\n" |
| 39448 | "event_t __ovld async_work_group_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, event_t event);\n" |
| 39449 | "event_t __ovld async_work_group_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, event_t event);\n" |
| 39450 | "event_t __ovld async_work_group_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, event_t event);\n" |
| 39451 | "event_t __ovld async_work_group_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, event_t event);\n" |
| 39452 | "event_t __ovld async_work_group_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, event_t event);\n" |
| 39453 | "event_t __ovld async_work_group_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, event_t event);\n" |
| 39454 | "event_t __ovld async_work_group_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, event_t event);\n" |
| 39455 | "event_t __ovld async_work_group_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, event_t event);\n" |
| 39456 | "event_t __ovld async_work_group_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, event_t event);\n" |
| 39457 | "event_t __ovld async_work_group_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, event_t event);\n" |
| 39458 | "event_t __ovld async_work_group_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, event_t event);\n" |
| 39459 | "event_t __ovld async_work_group_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, event_t event);\n" |
| 39460 | "event_t __ovld async_work_group_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, event_t event);\n" |
| 39461 | "event_t __ovld async_work_group_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, event_t event);\n" |
| 39462 | "event_t __ovld async_work_group_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, event_t event);\n" |
| 39463 | "event_t __ovld async_work_group_copy(__global char *dst, const __local char *src, size_t num_elements, event_t event);\n" |
| 39464 | "event_t __ovld async_work_group_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, event_t event);\n" |
| 39465 | "event_t __ovld async_work_group_copy(__global short *dst, const __local short *src, size_t num_elements, event_t event);\n" |
| 39466 | "event_t __ovld async_work_group_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, event_t event);\n" |
| 39467 | "event_t __ovld async_work_group_copy(__global int *dst, const __local int *src, size_t num_elements, event_t event);\n" |
| 39468 | "event_t __ovld async_work_group_copy(__global uint *dst, const __local uint *src, size_t num_elements, event_t event);\n" |
| 39469 | "event_t __ovld async_work_group_copy(__global long *dst, const __local long *src, size_t num_elements, event_t event);\n" |
| 39470 | "event_t __ovld async_work_group_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, event_t event);\n" |
| 39471 | "event_t __ovld async_work_group_copy(__global float *dst, const __local float *src, size_t num_elements, event_t event);\n" |
| 39472 | "event_t __ovld async_work_group_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, event_t event);\n" |
| 39473 | "event_t __ovld async_work_group_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, event_t event);\n" |
| 39474 | "event_t __ovld async_work_group_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, event_t event);\n" |
| 39475 | "event_t __ovld async_work_group_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, event_t event);\n" |
| 39476 | "event_t __ovld async_work_group_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, event_t event);\n" |
| 39477 | "event_t __ovld async_work_group_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, event_t event);\n" |
| 39478 | "event_t __ovld async_work_group_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, event_t event);\n" |
| 39479 | "event_t __ovld async_work_group_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, event_t event);\n" |
| 39480 | "event_t __ovld async_work_group_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, event_t event);\n" |
| 39481 | "event_t __ovld async_work_group_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, event_t event);\n" |
| 39482 | "event_t __ovld async_work_group_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, event_t event);\n" |
| 39483 | "event_t __ovld async_work_group_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, event_t event);\n" |
| 39484 | "event_t __ovld async_work_group_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, event_t event);\n" |
| 39485 | "event_t __ovld async_work_group_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, event_t event);\n" |
| 39486 | "event_t __ovld async_work_group_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, event_t event);\n" |
| 39487 | "event_t __ovld async_work_group_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, event_t event);\n" |
| 39488 | "event_t __ovld async_work_group_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, event_t event);\n" |
| 39489 | "event_t __ovld async_work_group_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, event_t event);\n" |
| 39490 | "event_t __ovld async_work_group_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, event_t event);\n" |
| 39491 | "event_t __ovld async_work_group_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, event_t event);\n" |
| 39492 | "event_t __ovld async_work_group_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, event_t event);\n" |
| 39493 | "event_t __ovld async_work_group_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, event_t event);\n" |
| 39494 | "event_t __ovld async_work_group_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, event_t event);\n" |
| 39495 | "event_t __ovld async_work_group_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, event_t event);\n" |
| 39496 | "event_t __ovld async_work_group_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, event_t event);\n" |
| 39497 | "event_t __ovld async_work_group_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, event_t event);\n" |
| 39498 | "event_t __ovld async_work_group_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, event_t event);\n" |
| 39499 | "event_t __ovld async_work_group_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, event_t event);\n" |
| 39500 | "event_t __ovld async_work_group_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, event_t event);\n" |
| 39501 | "event_t __ovld async_work_group_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, event_t event);\n" |
| 39502 | "event_t __ovld async_work_group_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, event_t event);\n" |
| 39503 | "event_t __ovld async_work_group_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, event_t event);\n" |
| 39504 | "event_t __ovld async_work_group_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, event_t event);\n" |
| 39505 | "event_t __ovld async_work_group_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, event_t event);\n" |
| 39506 | "event_t __ovld async_work_group_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, event_t event);\n" |
| 39507 | "event_t __ovld async_work_group_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, event_t event);\n" |
| 39508 | "event_t __ovld async_work_group_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, event_t event);\n" |
| 39509 | "event_t __ovld async_work_group_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, event_t event);\n" |
| 39510 | "event_t __ovld async_work_group_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, event_t event);\n" |
| 39511 | "event_t __ovld async_work_group_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, event_t event);\n" |
| 39512 | "event_t __ovld async_work_group_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, event_t event);\n" |
| 39513 | "event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, event_t event);\n" |
| 39514 | "event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event);\n" |
| 39515 | "event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event);\n" |
| 39516 | "event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event);\n" |
| 39517 | "#ifdef cl_khr_fp64\n" |
| 39518 | "event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event);\n" |
| 39519 | "event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event);\n" |
| 39520 | "event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event);\n" |
| 39521 | "event_t __ovld async_work_group_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, event_t event);\n" |
| 39522 | "event_t __ovld async_work_group_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, event_t event);\n" |
| 39523 | "event_t __ovld async_work_group_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, event_t event);\n" |
| 39524 | "event_t __ovld async_work_group_copy(__global double *dst, const __local double *src, size_t num_elements, event_t event);\n" |
| 39525 | "event_t __ovld async_work_group_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, event_t event);\n" |
| 39526 | "event_t __ovld async_work_group_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, event_t event);\n" |
| 39527 | "event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event);\n" |
| 39528 | "event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event);\n" |
| 39529 | "event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event);\n" |
| 39530 | "#endif //cl_khr_fp64\n" |
| 39531 | "#ifdef cl_khr_fp16\n" |
| 39532 | "event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event);\n" |
| 39533 | "event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event);\n" |
| 39534 | "event_t __ovld async_work_group_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, event_t event);\n" |
| 39535 | "event_t __ovld async_work_group_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, event_t event);\n" |
| 39536 | "event_t __ovld async_work_group_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, event_t event);\n" |
| 39537 | "event_t __ovld async_work_group_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, event_t event);\n" |
| 39538 | "event_t __ovld async_work_group_copy(__global half *dst, const __local half *src, size_t num_elements, event_t event);\n" |
| 39539 | "event_t __ovld async_work_group_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, event_t event);\n" |
| 39540 | "event_t __ovld async_work_group_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, event_t event);\n" |
| 39541 | "event_t __ovld async_work_group_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, event_t event);\n" |
| 39542 | "event_t __ovld async_work_group_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, event_t event);\n" |
| 39543 | "event_t __ovld async_work_group_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, event_t event);\n" |
| 39544 | "#endif //cl_khr_fp16\n" |
| 39545 | "\n" |
| 39546 | "/**\n" |
| 39547 | " * Perform an async gather of num_elements\n" |
| 39548 | " * gentype elements from src to dst. The\n" |
| 39549 | " * src_stride is the stride in elements for each\n" |
| 39550 | " * gentype element read from src. The dst_stride\n" |
| 39551 | " * is the stride in elements for each gentype\n" |
| 39552 | " * element written to dst. The async gather is\n" |
| 39553 | " * performed by all work-items in a work-group.\n" |
| 39554 | " * This built-in function must therefore be\n" |
| 39555 | " * encountered by all work-items in a work-group\n" |
| 39556 | " * executing the kernel with the same argument\n" |
| 39557 | " * values; otherwise the results are undefined.\n" |
| 39558 | " * Returns an event object that can be used by\n" |
| 39559 | " * wait_group_events to wait for the async copy\n" |
| 39560 | " * to finish. The event argument can also be used\n" |
| 39561 | " * to associate the\n" |
| 39562 | " * async_work_group_strided_copy with a\n" |
| 39563 | " * previous async copy allowing an event to be\n" |
| 39564 | " * shared by multiple async copies; otherwise event\n" |
| 39565 | " * should be zero.\n" |
| 39566 | " * If event argument is non-zero, the event object\n" |
| 39567 | " * supplied in event argument will be returned.\n" |
| 39568 | " * This function does not perform any implicit\n" |
| 39569 | " * synchronization of source data such as using a\n" |
| 39570 | " * barrier before performing the copy.\n" |
| 39571 | " */\n" |
| 39572 | "event_t __ovld async_work_group_strided_copy(__local char *dst, const __global char *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39573 | "event_t __ovld async_work_group_strided_copy(__local uchar *dst, const __global uchar *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39574 | "event_t __ovld async_work_group_strided_copy(__local short *dst, const __global short *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39575 | "event_t __ovld async_work_group_strided_copy(__local ushort *dst, const __global ushort *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39576 | "event_t __ovld async_work_group_strided_copy(__local int *dst, const __global int *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39577 | "event_t __ovld async_work_group_strided_copy(__local uint *dst, const __global uint *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39578 | "event_t __ovld async_work_group_strided_copy(__local long *dst, const __global long *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39579 | "event_t __ovld async_work_group_strided_copy(__local ulong *dst, const __global ulong *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39580 | "event_t __ovld async_work_group_strided_copy(__local float *dst, const __global float *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39581 | "event_t __ovld async_work_group_strided_copy(__local char2 *dst, const __global char2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39582 | "event_t __ovld async_work_group_strided_copy(__local uchar2 *dst, const __global uchar2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39583 | "event_t __ovld async_work_group_strided_copy(__local short2 *dst, const __global short2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39584 | "event_t __ovld async_work_group_strided_copy(__local ushort2 *dst, const __global ushort2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39585 | "event_t __ovld async_work_group_strided_copy(__local int2 *dst, const __global int2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39586 | "event_t __ovld async_work_group_strided_copy(__local uint2 *dst, const __global uint2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39587 | "event_t __ovld async_work_group_strided_copy(__local long2 *dst, const __global long2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39588 | "event_t __ovld async_work_group_strided_copy(__local ulong2 *dst, const __global ulong2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39589 | "event_t __ovld async_work_group_strided_copy(__local float2 *dst, const __global float2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39590 | "event_t __ovld async_work_group_strided_copy(__local char3 *dst, const __global char3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39591 | "event_t __ovld async_work_group_strided_copy(__local uchar3 *dst, const __global uchar3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39592 | "event_t __ovld async_work_group_strided_copy(__local short3 *dst, const __global short3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39593 | "event_t __ovld async_work_group_strided_copy(__local ushort3 *dst, const __global ushort3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39594 | "event_t __ovld async_work_group_strided_copy(__local int3 *dst, const __global int3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39595 | "event_t __ovld async_work_group_strided_copy(__local uint3 *dst, const __global uint3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39596 | "event_t __ovld async_work_group_strided_copy(__local long3 *dst, const __global long3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39597 | "event_t __ovld async_work_group_strided_copy(__local ulong3 *dst, const __global ulong3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39598 | "event_t __ovld async_work_group_strided_copy(__local float3 *dst, const __global float3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39599 | "event_t __ovld async_work_group_strided_copy(__local char4 *dst, const __global char4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39600 | "event_t __ovld async_work_group_strided_copy(__local uchar4 *dst, const __global uchar4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39601 | "event_t __ovld async_work_group_strided_copy(__local short4 *dst, const __global short4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39602 | "event_t __ovld async_work_group_strided_copy(__local ushort4 *dst, const __global ushort4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39603 | "event_t __ovld async_work_group_strided_copy(__local int4 *dst, const __global int4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39604 | "event_t __ovld async_work_group_strided_copy(__local uint4 *dst, const __global uint4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39605 | "event_t __ovld async_work_group_strided_copy(__local long4 *dst, const __global long4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39606 | "event_t __ovld async_work_group_strided_copy(__local ulong4 *dst, const __global ulong4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39607 | "event_t __ovld async_work_group_strided_copy(__local float4 *dst, const __global float4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39608 | "event_t __ovld async_work_group_strided_copy(__local char8 *dst, const __global char8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39609 | "event_t __ovld async_work_group_strided_copy(__local uchar8 *dst, const __global uchar8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39610 | "event_t __ovld async_work_group_strided_copy(__local short8 *dst, const __global short8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39611 | "event_t __ovld async_work_group_strided_copy(__local ushort8 *dst, const __global ushort8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39612 | "event_t __ovld async_work_group_strided_copy(__local int8 *dst, const __global int8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39613 | "event_t __ovld async_work_group_strided_copy(__local uint8 *dst, const __global uint8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39614 | "event_t __ovld async_work_group_strided_copy(__local long8 *dst, const __global long8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39615 | "event_t __ovld async_work_group_strided_copy(__local ulong8 *dst, const __global ulong8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39616 | "event_t __ovld async_work_group_strided_copy(__local float8 *dst, const __global float8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39617 | "event_t __ovld async_work_group_strided_copy(__local char16 *dst, const __global char16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39618 | "event_t __ovld async_work_group_strided_copy(__local uchar16 *dst, const __global uchar16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39619 | "event_t __ovld async_work_group_strided_copy(__local short16 *dst, const __global short16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39620 | "event_t __ovld async_work_group_strided_copy(__local ushort16 *dst, const __global ushort16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39621 | "event_t __ovld async_work_group_strided_copy(__local int16 *dst, const __global int16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39622 | "event_t __ovld async_work_group_strided_copy(__local uint16 *dst, const __global uint16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39623 | "event_t __ovld async_work_group_strided_copy(__local long16 *dst, const __global long16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39624 | "event_t __ovld async_work_group_strided_copy(__local ulong16 *dst, const __global ulong16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39625 | "event_t __ovld async_work_group_strided_copy(__local float16 *dst, const __global float16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39626 | "event_t __ovld async_work_group_strided_copy(__global char *dst, const __local char *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39627 | "event_t __ovld async_work_group_strided_copy(__global uchar *dst, const __local uchar *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39628 | "event_t __ovld async_work_group_strided_copy(__global short *dst, const __local short *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39629 | "event_t __ovld async_work_group_strided_copy(__global ushort *dst, const __local ushort *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39630 | "event_t __ovld async_work_group_strided_copy(__global int *dst, const __local int *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39631 | "event_t __ovld async_work_group_strided_copy(__global uint *dst, const __local uint *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39632 | "event_t __ovld async_work_group_strided_copy(__global long *dst, const __local long *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39633 | "event_t __ovld async_work_group_strided_copy(__global ulong *dst, const __local ulong *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39634 | "event_t __ovld async_work_group_strided_copy(__global float *dst, const __local float *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39635 | "event_t __ovld async_work_group_strided_copy(__global char2 *dst, const __local char2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39636 | "event_t __ovld async_work_group_strided_copy(__global uchar2 *dst, const __local uchar2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39637 | "event_t __ovld async_work_group_strided_copy(__global short2 *dst, const __local short2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39638 | "event_t __ovld async_work_group_strided_copy(__global ushort2 *dst, const __local ushort2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39639 | "event_t __ovld async_work_group_strided_copy(__global int2 *dst, const __local int2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39640 | "event_t __ovld async_work_group_strided_copy(__global uint2 *dst, const __local uint2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39641 | "event_t __ovld async_work_group_strided_copy(__global long2 *dst, const __local long2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39642 | "event_t __ovld async_work_group_strided_copy(__global ulong2 *dst, const __local ulong2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39643 | "event_t __ovld async_work_group_strided_copy(__global float2 *dst, const __local float2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39644 | "event_t __ovld async_work_group_strided_copy(__global char3 *dst, const __local char3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39645 | "event_t __ovld async_work_group_strided_copy(__global uchar3 *dst, const __local uchar3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39646 | "event_t __ovld async_work_group_strided_copy(__global short3 *dst, const __local short3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39647 | "event_t __ovld async_work_group_strided_copy(__global ushort3 *dst, const __local ushort3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39648 | "event_t __ovld async_work_group_strided_copy(__global int3 *dst, const __local int3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39649 | "event_t __ovld async_work_group_strided_copy(__global uint3 *dst, const __local uint3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39650 | "event_t __ovld async_work_group_strided_copy(__global long3 *dst, const __local long3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39651 | "event_t __ovld async_work_group_strided_copy(__global ulong3 *dst, const __local ulong3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39652 | "event_t __ovld async_work_group_strided_copy(__global float3 *dst, const __local float3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39653 | "event_t __ovld async_work_group_strided_copy(__global char4 *dst, const __local char4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39654 | "event_t __ovld async_work_group_strided_copy(__global uchar4 *dst, const __local uchar4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39655 | "event_t __ovld async_work_group_strided_copy(__global short4 *dst, const __local short4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39656 | "event_t __ovld async_work_group_strided_copy(__global ushort4 *dst, const __local ushort4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39657 | "event_t __ovld async_work_group_strided_copy(__global int4 *dst, const __local int4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39658 | "event_t __ovld async_work_group_strided_copy(__global uint4 *dst, const __local uint4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39659 | "event_t __ovld async_work_group_strided_copy(__global long4 *dst, const __local long4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39660 | "event_t __ovld async_work_group_strided_copy(__global ulong4 *dst, const __local ulong4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39661 | "event_t __ovld async_work_group_strided_copy(__global float4 *dst, const __local float4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39662 | "event_t __ovld async_work_group_strided_copy(__global char8 *dst, const __local char8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39663 | "event_t __ovld async_work_group_strided_copy(__global uchar8 *dst, const __local uchar8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39664 | "event_t __ovld async_work_group_strided_copy(__global short8 *dst, const __local short8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39665 | "event_t __ovld async_work_group_strided_copy(__global ushort8 *dst, const __local ushort8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39666 | "event_t __ovld async_work_group_strided_copy(__global int8 *dst, const __local int8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39667 | "event_t __ovld async_work_group_strided_copy(__global uint8 *dst, const __local uint8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39668 | "event_t __ovld async_work_group_strided_copy(__global long8 *dst, const __local long8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39669 | "event_t __ovld async_work_group_strided_copy(__global ulong8 *dst, const __local ulong8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39670 | "event_t __ovld async_work_group_strided_copy(__global float8 *dst, const __local float8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39671 | "event_t __ovld async_work_group_strided_copy(__global char16 *dst, const __local char16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39672 | "event_t __ovld async_work_group_strided_copy(__global uchar16 *dst, const __local uchar16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39673 | "event_t __ovld async_work_group_strided_copy(__global short16 *dst, const __local short16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39674 | "event_t __ovld async_work_group_strided_copy(__global ushort16 *dst, const __local ushort16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39675 | "event_t __ovld async_work_group_strided_copy(__global int16 *dst, const __local int16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39676 | "event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local uint16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39677 | "event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39678 | "event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39679 | "event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39680 | "#ifdef cl_khr_fp64\n" |
| 39681 | "event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39682 | "event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39683 | "event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39684 | "event_t __ovld async_work_group_strided_copy(__local double4 *dst, const __global double4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39685 | "event_t __ovld async_work_group_strided_copy(__local double8 *dst, const __global double8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39686 | "event_t __ovld async_work_group_strided_copy(__local double16 *dst, const __global double16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39687 | "event_t __ovld async_work_group_strided_copy(__global double *dst, const __local double *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39688 | "event_t __ovld async_work_group_strided_copy(__global double2 *dst, const __local double2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39689 | "event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __local double3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39690 | "event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39691 | "event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39692 | "event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39693 | "#endif //cl_khr_fp64\n" |
| 39694 | "#ifdef cl_khr_fp16\n" |
| 39695 | "event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39696 | "event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39697 | "event_t __ovld async_work_group_strided_copy(__local half3 *dst, const __global half3 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39698 | "event_t __ovld async_work_group_strided_copy(__local half4 *dst, const __global half4 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39699 | "event_t __ovld async_work_group_strided_copy(__local half8 *dst, const __global half8 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39700 | "event_t __ovld async_work_group_strided_copy(__local half16 *dst, const __global half16 *src, size_t num_elements, size_t src_stride, event_t event);\n" |
| 39701 | "event_t __ovld async_work_group_strided_copy(__global half *dst, const __local half *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39702 | "event_t __ovld async_work_group_strided_copy(__global half2 *dst, const __local half2 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39703 | "event_t __ovld async_work_group_strided_copy(__global half3 *dst, const __local half3 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39704 | "event_t __ovld async_work_group_strided_copy(__global half4 *dst, const __local half4 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39705 | "event_t __ovld async_work_group_strided_copy(__global half8 *dst, const __local half8 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39706 | "event_t __ovld async_work_group_strided_copy(__global half16 *dst, const __local half16 *src, size_t num_elements, size_t dst_stride, event_t event);\n" |
| 39707 | "#endif //cl_khr_fp16\n" |
| 39708 | "\n" |
| 39709 | "/**\n" |
| 39710 | " * Wait for events that identify the\n" |
| 39711 | " * async_work_group_copy operations to\n" |
| 39712 | " * complete. The event objects specified in\n" |
| 39713 | " * event_list will be released after the wait is\n" |
| 39714 | " * performed.\n" |
| 39715 | " * This function must be encountered by all workitems\n" |
| 39716 | " * in a work-group executing the kernel with\n" |
| 39717 | " * the same num_events and event objects specified\n" |
| 39718 | " * in event_list; otherwise the results are undefined.\n" |
| 39719 | " */\n" |
| 39720 | "void __ovld wait_group_events(int num_events, event_t *event_list);\n" |
| 39721 | "\n" |
| 39722 | "/**\n" |
| 39723 | " * Prefetch num_elements * sizeof(gentype)\n" |
| 39724 | " * bytes into the global cache. The prefetch\n" |
| 39725 | " * instruction is applied to a work-item in a workgroup\n" |
| 39726 | " * and does not affect the functional\n" |
| 39727 | " * behavior of the kernel.\n" |
| 39728 | " */\n" |
| 39729 | "void __ovld prefetch(const __global char *p, size_t num_elements);\n" |
| 39730 | "void __ovld prefetch(const __global uchar *p, size_t num_elements);\n" |
| 39731 | "void __ovld prefetch(const __global short *p, size_t num_elements);\n" |
| 39732 | "void __ovld prefetch(const __global ushort *p, size_t num_elements);\n" |
| 39733 | "void __ovld prefetch(const __global int *p, size_t num_elements);\n" |
| 39734 | "void __ovld prefetch(const __global uint *p, size_t num_elements);\n" |
| 39735 | "void __ovld prefetch(const __global long *p, size_t num_elements);\n" |
| 39736 | "void __ovld prefetch(const __global ulong *p, size_t num_elements);\n" |
| 39737 | "void __ovld prefetch(const __global float *p, size_t num_elements);\n" |
| 39738 | "void __ovld prefetch(const __global char2 *p, size_t num_elements);\n" |
| 39739 | "void __ovld prefetch(const __global uchar2 *p, size_t num_elements);\n" |
| 39740 | "void __ovld prefetch(const __global short2 *p, size_t num_elements);\n" |
| 39741 | "void __ovld prefetch(const __global ushort2 *p, size_t num_elements);\n" |
| 39742 | "void __ovld prefetch(const __global int2 *p, size_t num_elements);\n" |
| 39743 | "void __ovld prefetch(const __global uint2 *p, size_t num_elements);\n" |
| 39744 | "void __ovld prefetch(const __global long2 *p, size_t num_elements);\n" |
| 39745 | "void __ovld prefetch(const __global ulong2 *p, size_t num_elements);\n" |
| 39746 | "void __ovld prefetch(const __global float2 *p, size_t num_elements);\n" |
| 39747 | "void __ovld prefetch(const __global char3 *p, size_t num_elements);\n" |
| 39748 | "void __ovld prefetch(const __global uchar3 *p, size_t num_elements);\n" |
| 39749 | "void __ovld prefetch(const __global short3 *p, size_t num_elements);\n" |
| 39750 | "void __ovld prefetch(const __global ushort3 *p, size_t num_elements);\n" |
| 39751 | "void __ovld prefetch(const __global int3 *p, size_t num_elements);\n" |
| 39752 | "void __ovld prefetch(const __global uint3 *p, size_t num_elements);\n" |
| 39753 | "void __ovld prefetch(const __global long3 *p, size_t num_elements);\n" |
| 39754 | "void __ovld prefetch(const __global ulong3 *p, size_t num_elements);\n" |
| 39755 | "void __ovld prefetch(const __global float3 *p, size_t num_elements);\n" |
| 39756 | "void __ovld prefetch(const __global char4 *p, size_t num_elements);\n" |
| 39757 | "void __ovld prefetch(const __global uchar4 *p, size_t num_elements);\n" |
| 39758 | "void __ovld prefetch(const __global short4 *p, size_t num_elements);\n" |
| 39759 | "void __ovld prefetch(const __global ushort4 *p, size_t num_elements);\n" |
| 39760 | "void __ovld prefetch(const __global int4 *p, size_t num_elements);\n" |
| 39761 | "void __ovld prefetch(const __global uint4 *p, size_t num_elements);\n" |
| 39762 | "void __ovld prefetch(const __global long4 *p, size_t num_elements);\n" |
| 39763 | "void __ovld prefetch(const __global ulong4 *p, size_t num_elements);\n" |
| 39764 | "void __ovld prefetch(const __global float4 *p, size_t num_elements);\n" |
| 39765 | "void __ovld prefetch(const __global char8 *p, size_t num_elements);\n" |
| 39766 | "void __ovld prefetch(const __global uchar8 *p, size_t num_elements);\n" |
| 39767 | "void __ovld prefetch(const __global short8 *p, size_t num_elements);\n" |
| 39768 | "void __ovld prefetch(const __global ushort8 *p, size_t num_elements);\n" |
| 39769 | "void __ovld prefetch(const __global int8 *p, size_t num_elements);\n" |
| 39770 | "void __ovld prefetch(const __global uint8 *p, size_t num_elements);\n" |
| 39771 | "void __ovld prefetch(const __global long8 *p, size_t num_elements);\n" |
| 39772 | "void __ovld prefetch(const __global ulong8 *p, size_t num_elements);\n" |
| 39773 | "void __ovld prefetch(const __global float8 *p, size_t num_elements);\n" |
| 39774 | "void __ovld prefetch(const __global char16 *p, size_t num_elements);\n" |
| 39775 | "void __ovld prefetch(const __global uchar16 *p, size_t num_elements);\n" |
| 39776 | "void __ovld prefetch(const __global short16 *p, size_t num_elements);\n" |
| 39777 | "void __ovld prefetch(const __global ushort16 *p, size_t num_elements);\n" |
| 39778 | "void __ovld prefetch(const __global int16 *p, size_t num_elements);\n" |
| 39779 | "void __ovld prefetch(const __global uint16 *p, size_t num_elements);\n" |
| 39780 | "void __ovld prefetch(const __global long16 *p, size_t num_elements);\n" |
| 39781 | "void __ovld prefetch(const __global ulong16 *p, size_t num_elements);\n" |
| 39782 | "void __ovld prefetch(const __global float16 *p, size_t num_elements);\n" |
| 39783 | "#ifdef cl_khr_fp64\n" |
| 39784 | "void __ovld prefetch(const __global double *p, size_t num_elements);\n" |
| 39785 | "void __ovld prefetch(const __global double2 *p, size_t num_elements);\n" |
| 39786 | "void __ovld prefetch(const __global double3 *p, size_t num_elements);\n" |
| 39787 | "void __ovld prefetch(const __global double4 *p, size_t num_elements);\n" |
| 39788 | "void __ovld prefetch(const __global double8 *p, size_t num_elements);\n" |
| 39789 | "void __ovld prefetch(const __global double16 *p, size_t num_elements);\n" |
| 39790 | "#endif //cl_khr_fp64\n" |
| 39791 | "#ifdef cl_khr_fp16\n" |
| 39792 | "void __ovld prefetch(const __global half *p, size_t num_elements);\n" |
| 39793 | "void __ovld prefetch(const __global half2 *p, size_t num_elements);\n" |
| 39794 | "void __ovld prefetch(const __global half3 *p, size_t num_elements);\n" |
| 39795 | "void __ovld prefetch(const __global half4 *p, size_t num_elements);\n" |
| 39796 | "void __ovld prefetch(const __global half8 *p, size_t num_elements);\n" |
| 39797 | "void __ovld prefetch(const __global half16 *p, size_t num_elements);\n" |
| 39798 | "#endif // cl_khr_fp16\n" |
| 39799 | "\n" |
| 39800 | "// OpenCL v1.1 s6.11.1, v1.2 s6.12.11 - Atomic Functions\n" |
| 39801 | "\n" |
| 39802 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 39803 | "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" |
| 39804 | "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n" |
| 39805 | "#endif\n" |
| 39806 | "/**\n" |
| 39807 | " * Read the 32-bit value (referred to as old)\n" |
| 39808 | " * stored at location pointed by p. Compute\n" |
| 39809 | " * (old + val) and store result at location\n" |
| 39810 | " * pointed by p. The function returns old.\n" |
| 39811 | " */\n" |
| 39812 | "int __ovld atomic_add(volatile __global int *p, int val);\n" |
| 39813 | "unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val);\n" |
| 39814 | "int __ovld atomic_add(volatile __local int *p, int val);\n" |
| 39815 | "unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val);\n" |
| 39816 | "\n" |
| 39817 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
| 39818 | "int __ovld atom_add(volatile __global int *p, int val);\n" |
| 39819 | "unsigned int __ovld atom_add(volatile __global unsigned int *p, unsigned int val);\n" |
| 39820 | "#endif\n" |
| 39821 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
| 39822 | "int __ovld atom_add(volatile __local int *p, int val);\n" |
| 39823 | "unsigned int __ovld atom_add(volatile __local unsigned int *p, unsigned int val);\n" |
| 39824 | "#endif\n" |
| 39825 | "\n" |
| 39826 | "#if defined(cl_khr_int64_base_atomics)\n" |
| 39827 | "long __ovld atom_add(volatile __global long *p, long val);\n" |
| 39828 | "unsigned long __ovld atom_add(volatile __global unsigned long *p, unsigned long val);\n" |
| 39829 | "long __ovld atom_add(volatile __local long *p, long val);\n" |
| 39830 | "unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long val);\n" |
| 39831 | "#endif\n" |
| 39832 | "\n" |
| 39833 | "/**\n" |
| 39834 | " * Read the 32-bit value (referred to as old) stored at location pointed by p.\n" |
| 39835 | " * Compute (old - val) and store result at location pointed by p. The function\n" |
| 39836 | " * returns old.\n" |
| 39837 | " */\n" |
| 39838 | "int __ovld atomic_sub(volatile __global int *p, int val);\n" |
| 39839 | "unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val);\n" |
| 39840 | "int __ovld atomic_sub(volatile __local int *p, int val);\n" |
| 39841 | "unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val);\n" |
| 39842 | "\n" |
| 39843 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
| 39844 | "int __ovld atom_sub(volatile __global int *p, int val);\n" |
| 39845 | "unsigned int __ovld atom_sub(volatile __global unsigned int *p, unsigned int val);\n" |
| 39846 | "#endif\n" |
| 39847 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
| 39848 | "int __ovld atom_sub(volatile __local int *p, int val);\n" |
| 39849 | "unsigned int __ovld atom_sub(volatile __local unsigned int *p, unsigned int val);\n" |
| 39850 | "#endif\n" |
| 39851 | "\n" |
| 39852 | "#if defined(cl_khr_int64_base_atomics)\n" |
| 39853 | "long __ovld atom_sub(volatile __global long *p, long val);\n" |
| 39854 | "unsigned long __ovld atom_sub(volatile __global unsigned long *p, unsigned long val);\n" |
| 39855 | "long __ovld atom_sub(volatile __local long *p, long val);\n" |
| 39856 | "unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long val);\n" |
| 39857 | "#endif\n" |
| 39858 | "\n" |
| 39859 | "/**\n" |
| 39860 | " * Swaps the old value stored at location p\n" |
| 39861 | " * with new value given by val. Returns old\n" |
| 39862 | " * value.\n" |
| 39863 | " */\n" |
| 39864 | "int __ovld atomic_xchg(volatile __global int *p, int val);\n" |
| 39865 | "unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val);\n" |
| 39866 | "int __ovld atomic_xchg(volatile __local int *p, int val);\n" |
| 39867 | "unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val);\n" |
| 39868 | "float __ovld atomic_xchg(volatile __global float *p, float val);\n" |
| 39869 | "float __ovld atomic_xchg(volatile __local float *p, float val);\n" |
| 39870 | "\n" |
| 39871 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
| 39872 | "int __ovld atom_xchg(volatile __global int *p, int val);\n" |
| 39873 | "unsigned int __ovld atom_xchg(volatile __global unsigned int *p, unsigned int val);\n" |
| 39874 | "#endif\n" |
| 39875 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
| 39876 | "int __ovld atom_xchg(volatile __local int *p, int val);\n" |
| 39877 | "unsigned int __ovld atom_xchg(volatile __local unsigned int *p, unsigned int val);\n" |
| 39878 | "#endif\n" |
| 39879 | "\n" |
| 39880 | "#if defined(cl_khr_int64_base_atomics)\n" |
| 39881 | "long __ovld atom_xchg(volatile __global long *p, long val);\n" |
| 39882 | "long __ovld atom_xchg(volatile __local long *p, long val);\n" |
| 39883 | "unsigned long __ovld atom_xchg(volatile __global unsigned long *p, unsigned long val);\n" |
| 39884 | "unsigned long __ovld atom_xchg(volatile __local unsigned long *p, unsigned long val);\n" |
| 39885 | "#endif\n" |
| 39886 | "\n" |
| 39887 | "/**\n" |
| 39888 | " * Read the 32-bit value (referred to as old)\n" |
| 39889 | " * stored at location pointed by p. Compute\n" |
| 39890 | " * (old + 1) and store result at location\n" |
| 39891 | " * pointed by p. The function returns old.\n" |
| 39892 | " */\n" |
| 39893 | "int __ovld atomic_inc(volatile __global int *p);\n" |
| 39894 | "unsigned int __ovld atomic_inc(volatile __global unsigned int *p);\n" |
| 39895 | "int __ovld atomic_inc(volatile __local int *p);\n" |
| 39896 | "unsigned int __ovld atomic_inc(volatile __local unsigned int *p);\n" |
| 39897 | "\n" |
| 39898 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
| 39899 | "int __ovld atom_inc(volatile __global int *p);\n" |
| 39900 | "unsigned int __ovld atom_inc(volatile __global unsigned int *p);\n" |
| 39901 | "#endif\n" |
| 39902 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
| 39903 | "int __ovld atom_inc(volatile __local int *p);\n" |
| 39904 | "unsigned int __ovld atom_inc(volatile __local unsigned int *p);\n" |
| 39905 | "#endif\n" |
| 39906 | "\n" |
| 39907 | "#if defined(cl_khr_int64_base_atomics)\n" |
| 39908 | "long __ovld atom_inc(volatile __global long *p);\n" |
| 39909 | "unsigned long __ovld atom_inc(volatile __global unsigned long *p);\n" |
| 39910 | "long __ovld atom_inc(volatile __local long *p);\n" |
| 39911 | "unsigned long __ovld atom_inc(volatile __local unsigned long *p);\n" |
| 39912 | "#endif\n" |
| 39913 | "\n" |
| 39914 | "/**\n" |
| 39915 | " * Read the 32-bit value (referred to as old)\n" |
| 39916 | " * stored at location pointed by p. Compute\n" |
| 39917 | " * (old - 1) and store result at location\n" |
| 39918 | " * pointed by p. The function returns old.\n" |
| 39919 | " */\n" |
| 39920 | "int __ovld atomic_dec(volatile __global int *p);\n" |
| 39921 | "unsigned int __ovld atomic_dec(volatile __global unsigned int *p);\n" |
| 39922 | "int __ovld atomic_dec(volatile __local int *p);\n" |
| 39923 | "unsigned int __ovld atomic_dec(volatile __local unsigned int *p);\n" |
| 39924 | "\n" |
| 39925 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
| 39926 | "int __ovld atom_dec(volatile __global int *p);\n" |
| 39927 | "unsigned int __ovld atom_dec(volatile __global unsigned int *p);\n" |
| 39928 | "#endif\n" |
| 39929 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
| 39930 | "int __ovld atom_dec(volatile __local int *p);\n" |
| 39931 | "unsigned int __ovld atom_dec(volatile __local unsigned int *p);\n" |
| 39932 | "#endif\n" |
| 39933 | "\n" |
| 39934 | "#if defined(cl_khr_int64_base_atomics)\n" |
| 39935 | "long __ovld atom_dec(volatile __global long *p);\n" |
| 39936 | "unsigned long __ovld atom_dec(volatile __global unsigned long *p);\n" |
| 39937 | "long __ovld atom_dec(volatile __local long *p);\n" |
| 39938 | "unsigned long __ovld atom_dec(volatile __local unsigned long *p);\n" |
| 39939 | "#endif\n" |
| 39940 | "\n" |
| 39941 | "/**\n" |
| 39942 | " * Read the 32-bit value (referred to as old)\n" |
| 39943 | " * stored at location pointed by p. Compute\n" |
| 39944 | " * (old == cmp) ? val : old and store result at\n" |
| 39945 | " * location pointed by p. The function\n" |
| 39946 | " * returns old.\n" |
| 39947 | " */\n" |
| 39948 | "int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val);\n" |
| 39949 | "unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n" |
| 39950 | "int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val);\n" |
| 39951 | "unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n" |
| 39952 | "\n" |
| 39953 | "#if defined(cl_khr_global_int32_base_atomics)\n" |
| 39954 | "int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val);\n" |
| 39955 | "unsigned int __ovld atom_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);\n" |
| 39956 | "#endif\n" |
| 39957 | "#if defined(cl_khr_local_int32_base_atomics)\n" |
| 39958 | "int __ovld atom_cmpxchg(volatile __local int *p, int cmp, int val);\n" |
| 39959 | "unsigned int __ovld atom_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);\n" |
| 39960 | "#endif\n" |
| 39961 | "\n" |
| 39962 | "#if defined(cl_khr_int64_base_atomics)\n" |
| 39963 | "long __ovld atom_cmpxchg(volatile __global long *p, long cmp, long val);\n" |
| 39964 | "unsigned long __ovld atom_cmpxchg(volatile __global unsigned long *p, unsigned long cmp, unsigned long val);\n" |
| 39965 | "long __ovld atom_cmpxchg(volatile __local long *p, long cmp, long val);\n" |
| 39966 | "unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned long cmp, unsigned long val);\n" |
| 39967 | "#endif\n" |
| 39968 | "\n" |
| 39969 | "/**\n" |
| 39970 | " * Read the 32-bit value (referred to as old)\n" |
| 39971 | " * stored at location pointed by p. Compute\n" |
| 39972 | " * min(old, val) and store minimum value at\n" |
| 39973 | " * location pointed by p. The function\n" |
| 39974 | " * returns old.\n" |
| 39975 | " */\n" |
| 39976 | "int __ovld atomic_min(volatile __global int *p, int val);\n" |
| 39977 | "unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val);\n" |
| 39978 | "int __ovld atomic_min(volatile __local int *p, int val);\n" |
| 39979 | "unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val);\n" |
| 39980 | "\n" |
| 39981 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
| 39982 | "int __ovld atom_min(volatile __global int *p, int val);\n" |
| 39983 | "unsigned int __ovld atom_min(volatile __global unsigned int *p, unsigned int val);\n" |
| 39984 | "#endif\n" |
| 39985 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
| 39986 | "int __ovld atom_min(volatile __local int *p, int val);\n" |
| 39987 | "unsigned int __ovld atom_min(volatile __local unsigned int *p, unsigned int val);\n" |
| 39988 | "#endif\n" |
| 39989 | "\n" |
| 39990 | "#if defined(cl_khr_int64_extended_atomics)\n" |
| 39991 | "long __ovld atom_min(volatile __global long *p, long val);\n" |
| 39992 | "unsigned long __ovld atom_min(volatile __global unsigned long *p, unsigned long val);\n" |
| 39993 | "long __ovld atom_min(volatile __local long *p, long val);\n" |
| 39994 | "unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long val);\n" |
| 39995 | "#endif\n" |
| 39996 | "\n" |
| 39997 | "/**\n" |
| 39998 | " * Read the 32-bit value (referred to as old)\n" |
| 39999 | " * stored at location pointed by p. Compute\n" |
| 40000 | " * max(old, val) and store maximum value at\n" |
| 40001 | " * location pointed by p. The function\n" |
| 40002 | " * returns old.\n" |
| 40003 | " */\n" |
| 40004 | "int __ovld atomic_max(volatile __global int *p, int val);\n" |
| 40005 | "unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val);\n" |
| 40006 | "int __ovld atomic_max(volatile __local int *p, int val);\n" |
| 40007 | "unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val);\n" |
| 40008 | "\n" |
| 40009 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
| 40010 | "int __ovld atom_max(volatile __global int *p, int val);\n" |
| 40011 | "unsigned int __ovld atom_max(volatile __global unsigned int *p, unsigned int val);\n" |
| 40012 | "#endif\n" |
| 40013 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
| 40014 | "int __ovld atom_max(volatile __local int *p, int val);\n" |
| 40015 | "unsigned int __ovld atom_max(volatile __local unsigned int *p, unsigned int val);\n" |
| 40016 | "#endif\n" |
| 40017 | "\n" |
| 40018 | "#if defined(cl_khr_int64_extended_atomics)\n" |
| 40019 | "long __ovld atom_max(volatile __global long *p, long val);\n" |
| 40020 | "unsigned long __ovld atom_max(volatile __global unsigned long *p, unsigned long val);\n" |
| 40021 | "long __ovld atom_max(volatile __local long *p, long val);\n" |
| 40022 | "unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long val);\n" |
| 40023 | "#endif\n" |
| 40024 | "\n" |
| 40025 | "/**\n" |
| 40026 | " * Read the 32-bit value (referred to as old)\n" |
| 40027 | " * stored at location pointed by p. Compute\n" |
| 40028 | " * (old & val) and store result at location\n" |
| 40029 | " * pointed by p. The function returns old.\n" |
| 40030 | " */\n" |
| 40031 | "int __ovld atomic_and(volatile __global int *p, int val);\n" |
| 40032 | "unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val);\n" |
| 40033 | "int __ovld atomic_and(volatile __local int *p, int val);\n" |
| 40034 | "unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val);\n" |
| 40035 | "\n" |
| 40036 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
| 40037 | "int __ovld atom_and(volatile __global int *p, int val);\n" |
| 40038 | "unsigned int __ovld atom_and(volatile __global unsigned int *p, unsigned int val);\n" |
| 40039 | "#endif\n" |
| 40040 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
| 40041 | "int __ovld atom_and(volatile __local int *p, int val);\n" |
| 40042 | "unsigned int __ovld atom_and(volatile __local unsigned int *p, unsigned int val);\n" |
| 40043 | "#endif\n" |
| 40044 | "\n" |
| 40045 | "#if defined(cl_khr_int64_extended_atomics)\n" |
| 40046 | "long __ovld atom_and(volatile __global long *p, long val);\n" |
| 40047 | "unsigned long __ovld atom_and(volatile __global unsigned long *p, unsigned long val);\n" |
| 40048 | "long __ovld atom_and(volatile __local long *p, long val);\n" |
| 40049 | "unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long val);\n" |
| 40050 | "#endif\n" |
| 40051 | "\n" |
| 40052 | "/**\n" |
| 40053 | " * Read the 32-bit value (referred to as old)\n" |
| 40054 | " * stored at location pointed by p. Compute\n" |
| 40055 | " * (old | val) and store result at location\n" |
| 40056 | " * pointed by p. The function returns old.\n" |
| 40057 | " */\n" |
| 40058 | "int __ovld atomic_or(volatile __global int *p, int val);\n" |
| 40059 | "unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val);\n" |
| 40060 | "int __ovld atomic_or(volatile __local int *p, int val);\n" |
| 40061 | "unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val);\n" |
| 40062 | "\n" |
| 40063 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
| 40064 | "int __ovld atom_or(volatile __global int *p, int val);\n" |
| 40065 | "unsigned int __ovld atom_or(volatile __global unsigned int *p, unsigned int val);\n" |
| 40066 | "#endif\n" |
| 40067 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
| 40068 | "int __ovld atom_or(volatile __local int *p, int val);\n" |
| 40069 | "unsigned int __ovld atom_or(volatile __local unsigned int *p, unsigned int val);\n" |
| 40070 | "#endif\n" |
| 40071 | "\n" |
| 40072 | "#if defined(cl_khr_int64_extended_atomics)\n" |
| 40073 | "long __ovld atom_or(volatile __global long *p, long val);\n" |
| 40074 | "unsigned long __ovld atom_or(volatile __global unsigned long *p, unsigned long val);\n" |
| 40075 | "long __ovld atom_or(volatile __local long *p, long val);\n" |
| 40076 | "unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long val);\n" |
| 40077 | "#endif\n" |
| 40078 | "\n" |
| 40079 | "/**\n" |
| 40080 | " * Read the 32-bit value (referred to as old)\n" |
| 40081 | " * stored at location pointed by p. Compute\n" |
| 40082 | " * (old ^ val) and store result at location\n" |
| 40083 | " * pointed by p. The function returns old.\n" |
| 40084 | " */\n" |
| 40085 | "int __ovld atomic_xor(volatile __global int *p, int val);\n" |
| 40086 | "unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val);\n" |
| 40087 | "int __ovld atomic_xor(volatile __local int *p, int val);\n" |
| 40088 | "unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val);\n" |
| 40089 | "\n" |
| 40090 | "#if defined(cl_khr_global_int32_extended_atomics)\n" |
| 40091 | "int __ovld atom_xor(volatile __global int *p, int val);\n" |
| 40092 | "unsigned int __ovld atom_xor(volatile __global unsigned int *p, unsigned int val);\n" |
| 40093 | "#endif\n" |
| 40094 | "#if defined(cl_khr_local_int32_extended_atomics)\n" |
| 40095 | "int __ovld atom_xor(volatile __local int *p, int val);\n" |
| 40096 | "unsigned int __ovld atom_xor(volatile __local unsigned int *p, unsigned int val);\n" |
| 40097 | "#endif\n" |
| 40098 | "\n" |
| 40099 | "#if defined(cl_khr_int64_extended_atomics)\n" |
| 40100 | "long __ovld atom_xor(volatile __global long *p, long val);\n" |
| 40101 | "unsigned long __ovld atom_xor(volatile __global unsigned long *p, unsigned long val);\n" |
| 40102 | "long __ovld atom_xor(volatile __local long *p, long val);\n" |
| 40103 | "unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long val);\n" |
| 40104 | "#endif\n" |
| 40105 | "\n" |
| 40106 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40107 | "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable\n" |
| 40108 | "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : disable\n" |
| 40109 | "#endif\n" |
| 40110 | "\n" |
| 40111 | "// OpenCL v2.0 s6.13.11 - Atomics Functions\n" |
| 40112 | "\n" |
| 40113 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 40114 | "#ifndef ATOMIC_VAR_INIT\n" |
| 40115 | "#define ATOMIC_VAR_INIT(x) (x)\n" |
| 40116 | "#endif //ATOMIC_VAR_INIT\n" |
| 40117 | "#define ATOMIC_FLAG_INIT 0\n" |
| 40118 | "\n" |
| 40119 | "// enum values aligned with what clang uses in EmitAtomicExpr()\n" |
| 40120 | "typedef enum memory_order\n" |
| 40121 | "{\n" |
| 40122 | " memory_order_relaxed = __ATOMIC_RELAXED,\n" |
| 40123 | " memory_order_acquire = __ATOMIC_ACQUIRE,\n" |
| 40124 | " memory_order_release = __ATOMIC_RELEASE,\n" |
| 40125 | " memory_order_acq_rel = __ATOMIC_ACQ_REL,\n" |
| 40126 | " memory_order_seq_cst = __ATOMIC_SEQ_CST\n" |
| 40127 | "} memory_order;\n" |
| 40128 | "\n" |
| 40129 | "// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics\n" |
| 40130 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40131 | "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" |
| 40132 | "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n" |
| 40133 | "#endif\n" |
| 40134 | "\n" |
| 40135 | "// atomic_init()\n" |
| 40136 | "void __ovld atomic_init(volatile atomic_int *object, int value);\n" |
| 40137 | "void __ovld atomic_init(volatile atomic_uint *object, uint value);\n" |
| 40138 | "void __ovld atomic_init(volatile atomic_float *object, float value);\n" |
| 40139 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40140 | "void __ovld atomic_init(volatile atomic_long *object, long value);\n" |
| 40141 | "void __ovld atomic_init(volatile atomic_ulong *object, ulong value);\n" |
| 40142 | "#ifdef cl_khr_fp64\n" |
| 40143 | "void __ovld atomic_init(volatile atomic_double *object, double value);\n" |
| 40144 | "#endif //cl_khr_fp64\n" |
| 40145 | "#endif\n" |
| 40146 | "\n" |
| 40147 | "// atomic_work_item_fence()\n" |
| 40148 | "void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);\n" |
| 40149 | "\n" |
| 40150 | "// atomic_fetch()\n" |
| 40151 | "\n" |
| 40152 | "int __ovld atomic_fetch_add(volatile atomic_int *object, int operand);\n" |
| 40153 | "int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
| 40154 | "int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
| 40155 | "uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand);\n" |
| 40156 | "uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
| 40157 | "uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
| 40158 | "int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand);\n" |
| 40159 | "int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
| 40160 | "int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
| 40161 | "uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand);\n" |
| 40162 | "uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
| 40163 | "uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
| 40164 | "int __ovld atomic_fetch_or(volatile atomic_int *object, int operand);\n" |
| 40165 | "int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
| 40166 | "int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
| 40167 | "uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand);\n" |
| 40168 | "uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
| 40169 | "uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
| 40170 | "int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand);\n" |
| 40171 | "int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
| 40172 | "int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
| 40173 | "uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand);\n" |
| 40174 | "uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
| 40175 | "uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
| 40176 | "int __ovld atomic_fetch_and(volatile atomic_int *object, int operand);\n" |
| 40177 | "int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
| 40178 | "int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
| 40179 | "uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand);\n" |
| 40180 | "uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
| 40181 | "uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
| 40182 | "int __ovld atomic_fetch_min(volatile atomic_int *object, int operand);\n" |
| 40183 | "int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
| 40184 | "int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
| 40185 | "uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand);\n" |
| 40186 | "uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
| 40187 | "uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
| 40188 | "uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand);\n" |
| 40189 | "uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order);\n" |
| 40190 | "uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n" |
| 40191 | "int __ovld atomic_fetch_max(volatile atomic_int *object, int operand);\n" |
| 40192 | "int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order);\n" |
| 40193 | "int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);\n" |
| 40194 | "uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand);\n" |
| 40195 | "uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order);\n" |
| 40196 | "uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);\n" |
| 40197 | "uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand);\n" |
| 40198 | "uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order);\n" |
| 40199 | "uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);\n" |
| 40200 | "\n" |
| 40201 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40202 | "long __ovld atomic_fetch_add(volatile atomic_long *object, long operand);\n" |
| 40203 | "long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
| 40204 | "long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
| 40205 | "ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand);\n" |
| 40206 | "ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
| 40207 | "ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
| 40208 | "long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand);\n" |
| 40209 | "long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
| 40210 | "long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
| 40211 | "ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand);\n" |
| 40212 | "ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
| 40213 | "ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
| 40214 | "long __ovld atomic_fetch_or(volatile atomic_long *object, long operand);\n" |
| 40215 | "long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
| 40216 | "long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
| 40217 | "ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand);\n" |
| 40218 | "ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
| 40219 | "ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
| 40220 | "long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand);\n" |
| 40221 | "long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
| 40222 | "long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
| 40223 | "ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand);\n" |
| 40224 | "ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
| 40225 | "ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
| 40226 | "long __ovld atomic_fetch_and(volatile atomic_long *object, long operand);\n" |
| 40227 | "long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
| 40228 | "long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
| 40229 | "ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand);\n" |
| 40230 | "ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
| 40231 | "ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
| 40232 | "long __ovld atomic_fetch_min(volatile atomic_long *object, long operand);\n" |
| 40233 | "long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
| 40234 | "long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
| 40235 | "ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand);\n" |
| 40236 | "ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
| 40237 | "ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
| 40238 | "ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand);\n" |
| 40239 | "ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n" |
| 40240 | "ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n" |
| 40241 | "long __ovld atomic_fetch_max(volatile atomic_long *object, long operand);\n" |
| 40242 | "long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order);\n" |
| 40243 | "long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);\n" |
| 40244 | "ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand);\n" |
| 40245 | "ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);\n" |
| 40246 | "ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);\n" |
| 40247 | "ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand);\n" |
| 40248 | "ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order);\n" |
| 40249 | "ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);\n" |
| 40250 | "#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40251 | "\n" |
| 40252 | "// OpenCL v2.0 s6.13.11.7.5:\n" |
| 40253 | "// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.\n" |
| 40254 | "// or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.\n" |
| 40255 | "\n" |
| 40256 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40257 | "uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n" |
| 40258 | "uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n" |
| 40259 | "uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n" |
| 40260 | "uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand);\n" |
| 40261 | "uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);\n" |
| 40262 | "uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);\n" |
| 40263 | "\n" |
| 40264 | "uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand);\n" |
| 40265 | "uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n" |
| 40266 | "uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n" |
| 40267 | "uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand);\n" |
| 40268 | "uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n" |
| 40269 | "uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n" |
| 40270 | "uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand);\n" |
| 40271 | "uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);\n" |
| 40272 | "uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);\n" |
| 40273 | "uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax);\n" |
| 40274 | "uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n" |
| 40275 | "uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n" |
| 40276 | "uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax);\n" |
| 40277 | "uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);\n" |
| 40278 | "uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);\n" |
| 40279 | "\n" |
| 40280 | "intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand);\n" |
| 40281 | "intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n" |
| 40282 | "intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n" |
| 40283 | "intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand);\n" |
| 40284 | "intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n" |
| 40285 | "intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n" |
| 40286 | "intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand);\n" |
| 40287 | "intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);\n" |
| 40288 | "intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);\n" |
| 40289 | "intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax);\n" |
| 40290 | "intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n" |
| 40291 | "intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n" |
| 40292 | "intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax);\n" |
| 40293 | "intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);\n" |
| 40294 | "intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);\n" |
| 40295 | "#endif\n" |
| 40296 | "\n" |
| 40297 | "// atomic_store()\n" |
| 40298 | "\n" |
| 40299 | "void __ovld atomic_store(volatile atomic_int *object, int desired);\n" |
| 40300 | "void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order);\n" |
| 40301 | "void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n" |
| 40302 | "void __ovld atomic_store(volatile atomic_uint *object, uint desired);\n" |
| 40303 | "void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n" |
| 40304 | "void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n" |
| 40305 | "void __ovld atomic_store(volatile atomic_float *object, float desired);\n" |
| 40306 | "void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order);\n" |
| 40307 | "void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n" |
| 40308 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40309 | "#ifdef cl_khr_fp64\n" |
| 40310 | "void __ovld atomic_store(volatile atomic_double *object, double desired);\n" |
| 40311 | "void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order);\n" |
| 40312 | "void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n" |
| 40313 | "#endif //cl_khr_fp64\n" |
| 40314 | "void __ovld atomic_store(volatile atomic_long *object, long desired);\n" |
| 40315 | "void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order);\n" |
| 40316 | "void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n" |
| 40317 | "void __ovld atomic_store(volatile atomic_ulong *object, ulong desired);\n" |
| 40318 | "void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n" |
| 40319 | "void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n" |
| 40320 | "#endif\n" |
| 40321 | "\n" |
| 40322 | "// atomic_load()\n" |
| 40323 | "\n" |
| 40324 | "int __ovld atomic_load(volatile atomic_int *object);\n" |
| 40325 | "int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order);\n" |
| 40326 | "int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope);\n" |
| 40327 | "uint __ovld atomic_load(volatile atomic_uint *object);\n" |
| 40328 | "uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order);\n" |
| 40329 | "uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope);\n" |
| 40330 | "float __ovld atomic_load(volatile atomic_float *object);\n" |
| 40331 | "float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order);\n" |
| 40332 | "float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope);\n" |
| 40333 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40334 | "#ifdef cl_khr_fp64\n" |
| 40335 | "double __ovld atomic_load(volatile atomic_double *object);\n" |
| 40336 | "double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order);\n" |
| 40337 | "double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope);\n" |
| 40338 | "#endif //cl_khr_fp64\n" |
| 40339 | "long __ovld atomic_load(volatile atomic_long *object);\n" |
| 40340 | "long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order);\n" |
| 40341 | "long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope);\n" |
| 40342 | "ulong __ovld atomic_load(volatile atomic_ulong *object);\n" |
| 40343 | "ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order);\n" |
| 40344 | "ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope);\n" |
| 40345 | "#endif\n" |
| 40346 | "\n" |
| 40347 | "// atomic_exchange()\n" |
| 40348 | "\n" |
| 40349 | "int __ovld atomic_exchange(volatile atomic_int *object, int desired);\n" |
| 40350 | "int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order);\n" |
| 40351 | "int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);\n" |
| 40352 | "uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired);\n" |
| 40353 | "uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order);\n" |
| 40354 | "uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);\n" |
| 40355 | "float __ovld atomic_exchange(volatile atomic_float *object, float desired);\n" |
| 40356 | "float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order);\n" |
| 40357 | "float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);\n" |
| 40358 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40359 | "#ifdef cl_khr_fp64\n" |
| 40360 | "double __ovld atomic_exchange(volatile atomic_double *object, double desired);\n" |
| 40361 | "double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order);\n" |
| 40362 | "double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);\n" |
| 40363 | "#endif //cl_khr_fp64\n" |
| 40364 | "long __ovld atomic_exchange(volatile atomic_long *object, long desired);\n" |
| 40365 | "long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order);\n" |
| 40366 | "long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);\n" |
| 40367 | "ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired);\n" |
| 40368 | "ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);\n" |
| 40369 | "ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);\n" |
| 40370 | "#endif\n" |
| 40371 | "\n" |
| 40372 | "// atomic_compare_exchange_strong() and atomic_compare_exchange_weak()\n" |
| 40373 | "\n" |
| 40374 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired);\n" |
| 40375 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n" |
| 40376 | " int desired, memory_order success, memory_order failure);\n" |
| 40377 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,\n" |
| 40378 | " int desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40379 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired);\n" |
| 40380 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n" |
| 40381 | " uint desired, memory_order success, memory_order failure);\n" |
| 40382 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,\n" |
| 40383 | " uint desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40384 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired);\n" |
| 40385 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n" |
| 40386 | " int desired, memory_order success, memory_order failure);\n" |
| 40387 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,\n" |
| 40388 | " int desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40389 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired);\n" |
| 40390 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n" |
| 40391 | " uint desired, memory_order success, memory_order failure);\n" |
| 40392 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,\n" |
| 40393 | " uint desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40394 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired);\n" |
| 40395 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n" |
| 40396 | " float desired, memory_order success, memory_order failure);\n" |
| 40397 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,\n" |
| 40398 | " float desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40399 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired);\n" |
| 40400 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n" |
| 40401 | " float desired, memory_order success, memory_order failure);\n" |
| 40402 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,\n" |
| 40403 | " float desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40404 | "#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)\n" |
| 40405 | "#ifdef cl_khr_fp64\n" |
| 40406 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired);\n" |
| 40407 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n" |
| 40408 | " double desired, memory_order success, memory_order failure);\n" |
| 40409 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,\n" |
| 40410 | " double desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40411 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired);\n" |
| 40412 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n" |
| 40413 | " double desired, memory_order success, memory_order failure);\n" |
| 40414 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,\n" |
| 40415 | " double desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40416 | "#endif //cl_khr_fp64\n" |
| 40417 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired);\n" |
| 40418 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n" |
| 40419 | " long desired, memory_order success, memory_order failure);\n" |
| 40420 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,\n" |
| 40421 | " long desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40422 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired);\n" |
| 40423 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n" |
| 40424 | " long desired, memory_order success, memory_order failure);\n" |
| 40425 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,\n" |
| 40426 | " long desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40427 | "bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired);\n" |
| 40428 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n" |
| 40429 | " ulong desired, memory_order success, memory_order failure);\n" |
| 40430 | "bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,\n" |
| 40431 | " ulong desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40432 | "bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired);\n" |
| 40433 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n" |
| 40434 | " ulong desired, memory_order success, memory_order failure);\n" |
| 40435 | "bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,\n" |
| 40436 | " ulong desired, memory_order success, memory_order failure, memory_scope scope);\n" |
| 40437 | "#endif\n" |
| 40438 | "\n" |
| 40439 | "// atomic_flag_test_and_set() and atomic_flag_clear()\n" |
| 40440 | "\n" |
| 40441 | "bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object);\n" |
| 40442 | "bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order);\n" |
| 40443 | "bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n" |
| 40444 | "void __ovld atomic_flag_clear(volatile atomic_flag *object);\n" |
| 40445 | "void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);\n" |
| 40446 | "void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);\n" |
| 40447 | "\n" |
| 40448 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 40449 | "\n" |
| 40450 | "// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions\n" |
| 40451 | "\n" |
| 40452 | "/**\n" |
| 40453 | " * The shuffle and shuffle2 built-in functions construct\n" |
| 40454 | " * a permutation of elements from one or two input\n" |
| 40455 | " * vectors respectively that are of the same type,\n" |
| 40456 | " * returning a vector with the same element type as the\n" |
| 40457 | " * input and length that is the same as the shuffle mask.\n" |
| 40458 | " * The size of each element in the mask must match the\n" |
| 40459 | " * size of each element in the result. For shuffle, only\n" |
| 40460 | " * the ilogb(2m-1) least significant bits of each mask\n" |
| 40461 | " * element are considered. For shuffle2, only the\n" |
| 40462 | " * ilogb(2m-1)+1 least significant bits of each mask\n" |
| 40463 | " * element are considered. Other bits in the mask shall\n" |
| 40464 | " * be ignored.\n" |
| 40465 | " * The elements of the input vectors are numbered from\n" |
| 40466 | " * left to right across one or both of the vectors. For this\n" |
| 40467 | " * purpose, the number of elements in a vector is given\n" |
| 40468 | " * by vec_step(gentypem). The shuffle mask operand\n" |
| 40469 | " * specifies, for each element of the result vector, which\n" |
| 40470 | " * element of the one or two input vectors the result\n" |
| 40471 | " * element gets.\n" |
| 40472 | " * Examples:\n" |
| 40473 | " * uint4 mask = (uint4)(3, 2,\n" |
| 40474 | " * 1, 0);\n" |
| 40475 | " * float4 a;\n" |
| 40476 | " * float4 r = shuffle(a, mask);\n" |
| 40477 | " * // r.s0123 = a.wzyx\n" |
| 40478 | " * uint8 mask = (uint8)(0, 1, 2, 3,\n" |
| 40479 | " * 4, 5, 6, 7);\n" |
| 40480 | " * float4 a, b;\n" |
| 40481 | " * float8 r = shuffle2(a, b, mask);\n" |
| 40482 | " * // r.s0123 = a.xyzw\n" |
| 40483 | " * // r.s4567 = b.xyzw\n" |
| 40484 | " * uint4 mask;\n" |
| 40485 | " * float8 a;\n" |
| 40486 | " * float4 b;\n" |
| 40487 | " * b = shuffle(a, mask);\n" |
| 40488 | " * Examples that are not valid are:\n" |
| 40489 | " * uint8 mask;\n" |
| 40490 | " * short16 a;\n" |
| 40491 | " * short8 b;\n" |
| 40492 | " * b = shuffle(a, mask); <- not valid\n" |
| 40493 | " */\n" |
| 40494 | "char2 __ovld __cnfn shuffle(char2 x, uchar2 mask);\n" |
| 40495 | "char2 __ovld __cnfn shuffle(char4 x, uchar2 mask);\n" |
| 40496 | "char2 __ovld __cnfn shuffle(char8 x, uchar2 mask);\n" |
| 40497 | "char2 __ovld __cnfn shuffle(char16 x, uchar2 mask);\n" |
| 40498 | "\n" |
| 40499 | "uchar2 __ovld __cnfn shuffle(uchar2 x, uchar2 mask);\n" |
| 40500 | "uchar2 __ovld __cnfn shuffle(uchar4 x, uchar2 mask);\n" |
| 40501 | "uchar2 __ovld __cnfn shuffle(uchar8 x, uchar2 mask);\n" |
| 40502 | "uchar2 __ovld __cnfn shuffle(uchar16 x, uchar2 mask);\n" |
| 40503 | "\n" |
| 40504 | "short2 __ovld __cnfn shuffle(short2 x, ushort2 mask);\n" |
| 40505 | "short2 __ovld __cnfn shuffle(short4 x, ushort2 mask);\n" |
| 40506 | "short2 __ovld __cnfn shuffle(short8 x, ushort2 mask);\n" |
| 40507 | "short2 __ovld __cnfn shuffle(short16 x, ushort2 mask);\n" |
| 40508 | "\n" |
| 40509 | "ushort2 __ovld __cnfn shuffle(ushort2 x, ushort2 mask);\n" |
| 40510 | "ushort2 __ovld __cnfn shuffle(ushort4 x, ushort2 mask);\n" |
| 40511 | "ushort2 __ovld __cnfn shuffle(ushort8 x, ushort2 mask);\n" |
| 40512 | "ushort2 __ovld __cnfn shuffle(ushort16 x, ushort2 mask);\n" |
| 40513 | "\n" |
| 40514 | "int2 __ovld __cnfn shuffle(int2 x, uint2 mask);\n" |
| 40515 | "int2 __ovld __cnfn shuffle(int4 x, uint2 mask);\n" |
| 40516 | "int2 __ovld __cnfn shuffle(int8 x, uint2 mask);\n" |
| 40517 | "int2 __ovld __cnfn shuffle(int16 x, uint2 mask);\n" |
| 40518 | "\n" |
| 40519 | "uint2 __ovld __cnfn shuffle(uint2 x, uint2 mask);\n" |
| 40520 | "uint2 __ovld __cnfn shuffle(uint4 x, uint2 mask);\n" |
| 40521 | "uint2 __ovld __cnfn shuffle(uint8 x, uint2 mask);\n" |
| 40522 | "uint2 __ovld __cnfn shuffle(uint16 x, uint2 mask);\n" |
| 40523 | "\n" |
| 40524 | "long2 __ovld __cnfn shuffle(long2 x, ulong2 mask);\n" |
| 40525 | "long2 __ovld __cnfn shuffle(long4 x, ulong2 mask);\n" |
| 40526 | "long2 __ovld __cnfn shuffle(long8 x, ulong2 mask);\n" |
| 40527 | "long2 __ovld __cnfn shuffle(long16 x, ulong2 mask);\n" |
| 40528 | "\n" |
| 40529 | "ulong2 __ovld __cnfn shuffle(ulong2 x, ulong2 mask);\n" |
| 40530 | "ulong2 __ovld __cnfn shuffle(ulong4 x, ulong2 mask);\n" |
| 40531 | "ulong2 __ovld __cnfn shuffle(ulong8 x, ulong2 mask);\n" |
| 40532 | "ulong2 __ovld __cnfn shuffle(ulong16 x, ulong2 mask);\n" |
| 40533 | "\n" |
| 40534 | "float2 __ovld __cnfn shuffle(float2 x, uint2 mask);\n" |
| 40535 | "float2 __ovld __cnfn shuffle(float4 x, uint2 mask);\n" |
| 40536 | "float2 __ovld __cnfn shuffle(float8 x, uint2 mask);\n" |
| 40537 | "float2 __ovld __cnfn shuffle(float16 x, uint2 mask);\n" |
| 40538 | "\n" |
| 40539 | "char4 __ovld __cnfn shuffle(char2 x, uchar4 mask);\n" |
| 40540 | "char4 __ovld __cnfn shuffle(char4 x, uchar4 mask);\n" |
| 40541 | "char4 __ovld __cnfn shuffle(char8 x, uchar4 mask);\n" |
| 40542 | "char4 __ovld __cnfn shuffle(char16 x, uchar4 mask);\n" |
| 40543 | "\n" |
| 40544 | "uchar4 __ovld __cnfn shuffle(uchar2 x, uchar4 mask);\n" |
| 40545 | "uchar4 __ovld __cnfn shuffle(uchar4 x, uchar4 mask);\n" |
| 40546 | "uchar4 __ovld __cnfn shuffle(uchar8 x, uchar4 mask);\n" |
| 40547 | "uchar4 __ovld __cnfn shuffle(uchar16 x, uchar4 mask);\n" |
| 40548 | "\n" |
| 40549 | "short4 __ovld __cnfn shuffle(short2 x, ushort4 mask);\n" |
| 40550 | "short4 __ovld __cnfn shuffle(short4 x, ushort4 mask);\n" |
| 40551 | "short4 __ovld __cnfn shuffle(short8 x, ushort4 mask);\n" |
| 40552 | "short4 __ovld __cnfn shuffle(short16 x, ushort4 mask);\n" |
| 40553 | "\n" |
| 40554 | "ushort4 __ovld __cnfn shuffle(ushort2 x, ushort4 mask);\n" |
| 40555 | "ushort4 __ovld __cnfn shuffle(ushort4 x, ushort4 mask);\n" |
| 40556 | "ushort4 __ovld __cnfn shuffle(ushort8 x, ushort4 mask);\n" |
| 40557 | "ushort4 __ovld __cnfn shuffle(ushort16 x, ushort4 mask);\n" |
| 40558 | "\n" |
| 40559 | "int4 __ovld __cnfn shuffle(int2 x, uint4 mask);\n" |
| 40560 | "int4 __ovld __cnfn shuffle(int4 x, uint4 mask);\n" |
| 40561 | "int4 __ovld __cnfn shuffle(int8 x, uint4 mask);\n" |
| 40562 | "int4 __ovld __cnfn shuffle(int16 x, uint4 mask);\n" |
| 40563 | "\n" |
| 40564 | "uint4 __ovld __cnfn shuffle(uint2 x, uint4 mask);\n" |
| 40565 | "uint4 __ovld __cnfn shuffle(uint4 x, uint4 mask);\n" |
| 40566 | "uint4 __ovld __cnfn shuffle(uint8 x, uint4 mask);\n" |
| 40567 | "uint4 __ovld __cnfn shuffle(uint16 x, uint4 mask);\n" |
| 40568 | "\n" |
| 40569 | "long4 __ovld __cnfn shuffle(long2 x, ulong4 mask);\n" |
| 40570 | "long4 __ovld __cnfn shuffle(long4 x, ulong4 mask);\n" |
| 40571 | "long4 __ovld __cnfn shuffle(long8 x, ulong4 mask);\n" |
| 40572 | "long4 __ovld __cnfn shuffle(long16 x, ulong4 mask);\n" |
| 40573 | "\n" |
| 40574 | "ulong4 __ovld __cnfn shuffle(ulong2 x, ulong4 mask);\n" |
| 40575 | "ulong4 __ovld __cnfn shuffle(ulong4 x, ulong4 mask);\n" |
| 40576 | "ulong4 __ovld __cnfn shuffle(ulong8 x, ulong4 mask);\n" |
| 40577 | "ulong4 __ovld __cnfn shuffle(ulong16 x, ulong4 mask);\n" |
| 40578 | "\n" |
| 40579 | "float4 __ovld __cnfn shuffle(float2 x, uint4 mask);\n" |
| 40580 | "float4 __ovld __cnfn shuffle(float4 x, uint4 mask);\n" |
| 40581 | "float4 __ovld __cnfn shuffle(float8 x, uint4 mask);\n" |
| 40582 | "float4 __ovld __cnfn shuffle(float16 x, uint4 mask);\n" |
| 40583 | "\n" |
| 40584 | "char8 __ovld __cnfn shuffle(char2 x, uchar8 mask);\n" |
| 40585 | "char8 __ovld __cnfn shuffle(char4 x, uchar8 mask);\n" |
| 40586 | "char8 __ovld __cnfn shuffle(char8 x, uchar8 mask);\n" |
| 40587 | "char8 __ovld __cnfn shuffle(char16 x, uchar8 mask);\n" |
| 40588 | "\n" |
| 40589 | "uchar8 __ovld __cnfn shuffle(uchar2 x, uchar8 mask);\n" |
| 40590 | "uchar8 __ovld __cnfn shuffle(uchar4 x, uchar8 mask);\n" |
| 40591 | "uchar8 __ovld __cnfn shuffle(uchar8 x, uchar8 mask);\n" |
| 40592 | "uchar8 __ovld __cnfn shuffle(uchar16 x, uchar8 mask);\n" |
| 40593 | "\n" |
| 40594 | "short8 __ovld __cnfn shuffle(short2 x, ushort8 mask);\n" |
| 40595 | "short8 __ovld __cnfn shuffle(short4 x, ushort8 mask);\n" |
| 40596 | "short8 __ovld __cnfn shuffle(short8 x, ushort8 mask);\n" |
| 40597 | "short8 __ovld __cnfn shuffle(short16 x, ushort8 mask);\n" |
| 40598 | "\n" |
| 40599 | "ushort8 __ovld __cnfn shuffle(ushort2 x, ushort8 mask);\n" |
| 40600 | "ushort8 __ovld __cnfn shuffle(ushort4 x, ushort8 mask);\n" |
| 40601 | "ushort8 __ovld __cnfn shuffle(ushort8 x, ushort8 mask);\n" |
| 40602 | "ushort8 __ovld __cnfn shuffle(ushort16 x, ushort8 mask);\n" |
| 40603 | "\n" |
| 40604 | "int8 __ovld __cnfn shuffle(int2 x, uint8 mask);\n" |
| 40605 | "int8 __ovld __cnfn shuffle(int4 x, uint8 mask);\n" |
| 40606 | "int8 __ovld __cnfn shuffle(int8 x, uint8 mask);\n" |
| 40607 | "int8 __ovld __cnfn shuffle(int16 x, uint8 mask);\n" |
| 40608 | "\n" |
| 40609 | "uint8 __ovld __cnfn shuffle(uint2 x, uint8 mask);\n" |
| 40610 | "uint8 __ovld __cnfn shuffle(uint4 x, uint8 mask);\n" |
| 40611 | "uint8 __ovld __cnfn shuffle(uint8 x, uint8 mask);\n" |
| 40612 | "uint8 __ovld __cnfn shuffle(uint16 x, uint8 mask);\n" |
| 40613 | "\n" |
| 40614 | "long8 __ovld __cnfn shuffle(long2 x, ulong8 mask);\n" |
| 40615 | "long8 __ovld __cnfn shuffle(long4 x, ulong8 mask);\n" |
| 40616 | "long8 __ovld __cnfn shuffle(long8 x, ulong8 mask);\n" |
| 40617 | "long8 __ovld __cnfn shuffle(long16 x, ulong8 mask);\n" |
| 40618 | "\n" |
| 40619 | "ulong8 __ovld __cnfn shuffle(ulong2 x, ulong8 mask);\n" |
| 40620 | "ulong8 __ovld __cnfn shuffle(ulong4 x, ulong8 mask);\n" |
| 40621 | "ulong8 __ovld __cnfn shuffle(ulong8 x, ulong8 mask);\n" |
| 40622 | "ulong8 __ovld __cnfn shuffle(ulong16 x, ulong8 mask);\n" |
| 40623 | "\n" |
| 40624 | "float8 __ovld __cnfn shuffle(float2 x, uint8 mask);\n" |
| 40625 | "float8 __ovld __cnfn shuffle(float4 x, uint8 mask);\n" |
| 40626 | "float8 __ovld __cnfn shuffle(float8 x, uint8 mask);\n" |
| 40627 | "float8 __ovld __cnfn shuffle(float16 x, uint8 mask);\n" |
| 40628 | "\n" |
| 40629 | "char16 __ovld __cnfn shuffle(char2 x, uchar16 mask);\n" |
| 40630 | "char16 __ovld __cnfn shuffle(char4 x, uchar16 mask);\n" |
| 40631 | "char16 __ovld __cnfn shuffle(char8 x, uchar16 mask);\n" |
| 40632 | "char16 __ovld __cnfn shuffle(char16 x, uchar16 mask);\n" |
| 40633 | "\n" |
| 40634 | "uchar16 __ovld __cnfn shuffle(uchar2 x, uchar16 mask);\n" |
| 40635 | "uchar16 __ovld __cnfn shuffle(uchar4 x, uchar16 mask);\n" |
| 40636 | "uchar16 __ovld __cnfn shuffle(uchar8 x, uchar16 mask);\n" |
| 40637 | "uchar16 __ovld __cnfn shuffle(uchar16 x, uchar16 mask);\n" |
| 40638 | "\n" |
| 40639 | "short16 __ovld __cnfn shuffle(short2 x, ushort16 mask);\n" |
| 40640 | "short16 __ovld __cnfn shuffle(short4 x, ushort16 mask);\n" |
| 40641 | "short16 __ovld __cnfn shuffle(short8 x, ushort16 mask);\n" |
| 40642 | "short16 __ovld __cnfn shuffle(short16 x, ushort16 mask);\n" |
| 40643 | "\n" |
| 40644 | "ushort16 __ovld __cnfn shuffle(ushort2 x, ushort16 mask);\n" |
| 40645 | "ushort16 __ovld __cnfn shuffle(ushort4 x, ushort16 mask);\n" |
| 40646 | "ushort16 __ovld __cnfn shuffle(ushort8 x, ushort16 mask);\n" |
| 40647 | "ushort16 __ovld __cnfn shuffle(ushort16 x, ushort16 mask);\n" |
| 40648 | "\n" |
| 40649 | "int16 __ovld __cnfn shuffle(int2 x, uint16 mask);\n" |
| 40650 | "int16 __ovld __cnfn shuffle(int4 x, uint16 mask);\n" |
| 40651 | "int16 __ovld __cnfn shuffle(int8 x, uint16 mask);\n" |
| 40652 | "int16 __ovld __cnfn shuffle(int16 x, uint16 mask);\n" |
| 40653 | "\n" |
| 40654 | "uint16 __ovld __cnfn shuffle(uint2 x, uint16 mask);\n" |
| 40655 | "uint16 __ovld __cnfn shuffle(uint4 x, uint16 mask);\n" |
| 40656 | "uint16 __ovld __cnfn shuffle(uint8 x, uint16 mask);\n" |
| 40657 | "uint16 __ovld __cnfn shuffle(uint16 x, uint16 mask);\n" |
| 40658 | "\n" |
| 40659 | "long16 __ovld __cnfn shuffle(long2 x, ulong16 mask);\n" |
| 40660 | "long16 __ovld __cnfn shuffle(long4 x, ulong16 mask);\n" |
| 40661 | "long16 __ovld __cnfn shuffle(long8 x, ulong16 mask);\n" |
| 40662 | "long16 __ovld __cnfn shuffle(long16 x, ulong16 mask);\n" |
| 40663 | "\n" |
| 40664 | "ulong16 __ovld __cnfn shuffle(ulong2 x, ulong16 mask);\n" |
| 40665 | "ulong16 __ovld __cnfn shuffle(ulong4 x, ulong16 mask);\n" |
| 40666 | "ulong16 __ovld __cnfn shuffle(ulong8 x, ulong16 mask);\n" |
| 40667 | "ulong16 __ovld __cnfn shuffle(ulong16 x, ulong16 mask);\n" |
| 40668 | "\n" |
| 40669 | "float16 __ovld __cnfn shuffle(float2 x, uint16 mask);\n" |
| 40670 | "float16 __ovld __cnfn shuffle(float4 x, uint16 mask);\n" |
| 40671 | "float16 __ovld __cnfn shuffle(float8 x, uint16 mask);\n" |
| 40672 | "float16 __ovld __cnfn shuffle(float16 x, uint16 mask);\n" |
| 40673 | "\n" |
| 40674 | "#ifdef cl_khr_fp64\n" |
| 40675 | "double2 __ovld __cnfn shuffle(double2 x, ulong2 mask);\n" |
| 40676 | "double2 __ovld __cnfn shuffle(double4 x, ulong2 mask);\n" |
| 40677 | "double2 __ovld __cnfn shuffle(double8 x, ulong2 mask);\n" |
| 40678 | "double2 __ovld __cnfn shuffle(double16 x, ulong2 mask);\n" |
| 40679 | "\n" |
| 40680 | "double4 __ovld __cnfn shuffle(double2 x, ulong4 mask);\n" |
| 40681 | "double4 __ovld __cnfn shuffle(double4 x, ulong4 mask);\n" |
| 40682 | "double4 __ovld __cnfn shuffle(double8 x, ulong4 mask);\n" |
| 40683 | "double4 __ovld __cnfn shuffle(double16 x, ulong4 mask);\n" |
| 40684 | "\n" |
| 40685 | "double8 __ovld __cnfn shuffle(double2 x, ulong8 mask);\n" |
| 40686 | "double8 __ovld __cnfn shuffle(double4 x, ulong8 mask);\n" |
| 40687 | "double8 __ovld __cnfn shuffle(double8 x, ulong8 mask);\n" |
| 40688 | "double8 __ovld __cnfn shuffle(double16 x, ulong8 mask);\n" |
| 40689 | "\n" |
| 40690 | "double16 __ovld __cnfn shuffle(double2 x, ulong16 mask);\n" |
| 40691 | "double16 __ovld __cnfn shuffle(double4 x, ulong16 mask);\n" |
| 40692 | "double16 __ovld __cnfn shuffle(double8 x, ulong16 mask);\n" |
| 40693 | "double16 __ovld __cnfn shuffle(double16 x, ulong16 mask);\n" |
| 40694 | "#endif //cl_khr_fp64\n" |
| 40695 | "\n" |
| 40696 | "#ifdef cl_khr_fp16\n" |
| 40697 | "half2 __ovld __cnfn shuffle(half2 x, ushort2 mask);\n" |
| 40698 | "half2 __ovld __cnfn shuffle(half4 x, ushort2 mask);\n" |
| 40699 | "half2 __ovld __cnfn shuffle(half8 x, ushort2 mask);\n" |
| 40700 | "half2 __ovld __cnfn shuffle(half16 x, ushort2 mask);\n" |
| 40701 | "\n" |
| 40702 | "half4 __ovld __cnfn shuffle(half2 x, ushort4 mask);\n" |
| 40703 | "half4 __ovld __cnfn shuffle(half4 x, ushort4 mask);\n" |
| 40704 | "half4 __ovld __cnfn shuffle(half8 x, ushort4 mask);\n" |
| 40705 | "half4 __ovld __cnfn shuffle(half16 x, ushort4 mask);\n" |
| 40706 | "\n" |
| 40707 | "half8 __ovld __cnfn shuffle(half2 x, ushort8 mask);\n" |
| 40708 | "half8 __ovld __cnfn shuffle(half4 x, ushort8 mask);\n" |
| 40709 | "half8 __ovld __cnfn shuffle(half8 x, ushort8 mask);\n" |
| 40710 | "half8 __ovld __cnfn shuffle(half16 x, ushort8 mask);\n" |
| 40711 | "\n" |
| 40712 | "half16 __ovld __cnfn shuffle(half2 x, ushort16 mask);\n" |
| 40713 | "half16 __ovld __cnfn shuffle(half4 x, ushort16 mask);\n" |
| 40714 | "half16 __ovld __cnfn shuffle(half8 x, ushort16 mask);\n" |
| 40715 | "half16 __ovld __cnfn shuffle(half16 x, ushort16 mask);\n" |
| 40716 | "#endif //cl_khr_fp16\n" |
| 40717 | "\n" |
| 40718 | "char2 __ovld __cnfn shuffle2(char2 x, char2 y, uchar2 mask);\n" |
| 40719 | "char2 __ovld __cnfn shuffle2(char4 x, char4 y, uchar2 mask);\n" |
| 40720 | "char2 __ovld __cnfn shuffle2(char8 x, char8 y, uchar2 mask);\n" |
| 40721 | "char2 __ovld __cnfn shuffle2(char16 x, char16 y, uchar2 mask);\n" |
| 40722 | "\n" |
| 40723 | "uchar2 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar2 mask);\n" |
| 40724 | "uchar2 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar2 mask);\n" |
| 40725 | "uchar2 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar2 mask);\n" |
| 40726 | "uchar2 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar2 mask);\n" |
| 40727 | "\n" |
| 40728 | "short2 __ovld __cnfn shuffle2(short2 x, short2 y, ushort2 mask);\n" |
| 40729 | "short2 __ovld __cnfn shuffle2(short4 x, short4 y, ushort2 mask);\n" |
| 40730 | "short2 __ovld __cnfn shuffle2(short8 x, short8 y, ushort2 mask);\n" |
| 40731 | "short2 __ovld __cnfn shuffle2(short16 x, short16 y, ushort2 mask);\n" |
| 40732 | "\n" |
| 40733 | "ushort2 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort2 mask);\n" |
| 40734 | "ushort2 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort2 mask);\n" |
| 40735 | "ushort2 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort2 mask);\n" |
| 40736 | "ushort2 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort2 mask);\n" |
| 40737 | "\n" |
| 40738 | "int2 __ovld __cnfn shuffle2(int2 x, int2 y, uint2 mask);\n" |
| 40739 | "int2 __ovld __cnfn shuffle2(int4 x, int4 y, uint2 mask);\n" |
| 40740 | "int2 __ovld __cnfn shuffle2(int8 x, int8 y, uint2 mask);\n" |
| 40741 | "int2 __ovld __cnfn shuffle2(int16 x, int16 y, uint2 mask);\n" |
| 40742 | "\n" |
| 40743 | "uint2 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint2 mask);\n" |
| 40744 | "uint2 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint2 mask);\n" |
| 40745 | "uint2 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint2 mask);\n" |
| 40746 | "uint2 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint2 mask);\n" |
| 40747 | "\n" |
| 40748 | "long2 __ovld __cnfn shuffle2(long2 x, long2 y, ulong2 mask);\n" |
| 40749 | "long2 __ovld __cnfn shuffle2(long4 x, long4 y, ulong2 mask);\n" |
| 40750 | "long2 __ovld __cnfn shuffle2(long8 x, long8 y, ulong2 mask);\n" |
| 40751 | "long2 __ovld __cnfn shuffle2(long16 x, long16 y, ulong2 mask);\n" |
| 40752 | "\n" |
| 40753 | "ulong2 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong2 mask);\n" |
| 40754 | "ulong2 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong2 mask);\n" |
| 40755 | "ulong2 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong2 mask);\n" |
| 40756 | "ulong2 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong2 mask);\n" |
| 40757 | "\n" |
| 40758 | "float2 __ovld __cnfn shuffle2(float2 x, float2 y, uint2 mask);\n" |
| 40759 | "float2 __ovld __cnfn shuffle2(float4 x, float4 y, uint2 mask);\n" |
| 40760 | "float2 __ovld __cnfn shuffle2(float8 x, float8 y, uint2 mask);\n" |
| 40761 | "float2 __ovld __cnfn shuffle2(float16 x, float16 y, uint2 mask);\n" |
| 40762 | "\n" |
| 40763 | "char4 __ovld __cnfn shuffle2(char2 x, char2 y, uchar4 mask);\n" |
| 40764 | "char4 __ovld __cnfn shuffle2(char4 x, char4 y, uchar4 mask);\n" |
| 40765 | "char4 __ovld __cnfn shuffle2(char8 x, char8 y, uchar4 mask);\n" |
| 40766 | "char4 __ovld __cnfn shuffle2(char16 x, char16 y, uchar4 mask);\n" |
| 40767 | "\n" |
| 40768 | "uchar4 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar4 mask);\n" |
| 40769 | "uchar4 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar4 mask);\n" |
| 40770 | "uchar4 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar4 mask);\n" |
| 40771 | "uchar4 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar4 mask);\n" |
| 40772 | "\n" |
| 40773 | "short4 __ovld __cnfn shuffle2(short2 x, short2 y, ushort4 mask);\n" |
| 40774 | "short4 __ovld __cnfn shuffle2(short4 x, short4 y, ushort4 mask);\n" |
| 40775 | "short4 __ovld __cnfn shuffle2(short8 x, short8 y, ushort4 mask);\n" |
| 40776 | "short4 __ovld __cnfn shuffle2(short16 x, short16 y, ushort4 mask);\n" |
| 40777 | "\n" |
| 40778 | "ushort4 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort4 mask);\n" |
| 40779 | "ushort4 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort4 mask);\n" |
| 40780 | "ushort4 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort4 mask);\n" |
| 40781 | "ushort4 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort4 mask);\n" |
| 40782 | "\n" |
| 40783 | "int4 __ovld __cnfn shuffle2(int2 x, int2 y, uint4 mask);\n" |
| 40784 | "int4 __ovld __cnfn shuffle2(int4 x, int4 y, uint4 mask);\n" |
| 40785 | "int4 __ovld __cnfn shuffle2(int8 x, int8 y, uint4 mask);\n" |
| 40786 | "int4 __ovld __cnfn shuffle2(int16 x, int16 y, uint4 mask);\n" |
| 40787 | "\n" |
| 40788 | "uint4 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint4 mask);\n" |
| 40789 | "uint4 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint4 mask);\n" |
| 40790 | "uint4 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint4 mask);\n" |
| 40791 | "uint4 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint4 mask);\n" |
| 40792 | "\n" |
| 40793 | "long4 __ovld __cnfn shuffle2(long2 x, long2 y, ulong4 mask);\n" |
| 40794 | "long4 __ovld __cnfn shuffle2(long4 x, long4 y, ulong4 mask);\n" |
| 40795 | "long4 __ovld __cnfn shuffle2(long8 x, long8 y, ulong4 mask);\n" |
| 40796 | "long4 __ovld __cnfn shuffle2(long16 x, long16 y, ulong4 mask);\n" |
| 40797 | "\n" |
| 40798 | "ulong4 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong4 mask);\n" |
| 40799 | "ulong4 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong4 mask);\n" |
| 40800 | "ulong4 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong4 mask);\n" |
| 40801 | "ulong4 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong4 mask);\n" |
| 40802 | "\n" |
| 40803 | "float4 __ovld __cnfn shuffle2(float2 x, float2 y, uint4 mask);\n" |
| 40804 | "float4 __ovld __cnfn shuffle2(float4 x, float4 y, uint4 mask);\n" |
| 40805 | "float4 __ovld __cnfn shuffle2(float8 x, float8 y, uint4 mask);\n" |
| 40806 | "float4 __ovld __cnfn shuffle2(float16 x, float16 y, uint4 mask);\n" |
| 40807 | "\n" |
| 40808 | "char8 __ovld __cnfn shuffle2(char2 x, char2 y, uchar8 mask);\n" |
| 40809 | "char8 __ovld __cnfn shuffle2(char4 x, char4 y, uchar8 mask);\n" |
| 40810 | "char8 __ovld __cnfn shuffle2(char8 x, char8 y, uchar8 mask);\n" |
| 40811 | "char8 __ovld __cnfn shuffle2(char16 x, char16 y, uchar8 mask);\n" |
| 40812 | "\n" |
| 40813 | "uchar8 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar8 mask);\n" |
| 40814 | "uchar8 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar8 mask);\n" |
| 40815 | "uchar8 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar8 mask);\n" |
| 40816 | "uchar8 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar8 mask);\n" |
| 40817 | "\n" |
| 40818 | "short8 __ovld __cnfn shuffle2(short2 x, short2 y, ushort8 mask);\n" |
| 40819 | "short8 __ovld __cnfn shuffle2(short4 x, short4 y, ushort8 mask);\n" |
| 40820 | "short8 __ovld __cnfn shuffle2(short8 x, short8 y, ushort8 mask);\n" |
| 40821 | "short8 __ovld __cnfn shuffle2(short16 x, short16 y, ushort8 mask);\n" |
| 40822 | "\n" |
| 40823 | "ushort8 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort8 mask);\n" |
| 40824 | "ushort8 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort8 mask);\n" |
| 40825 | "ushort8 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort8 mask);\n" |
| 40826 | "ushort8 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort8 mask);\n" |
| 40827 | "\n" |
| 40828 | "int8 __ovld __cnfn shuffle2(int2 x, int2 y, uint8 mask);\n" |
| 40829 | "int8 __ovld __cnfn shuffle2(int4 x, int4 y, uint8 mask);\n" |
| 40830 | "int8 __ovld __cnfn shuffle2(int8 x, int8 y, uint8 mask);\n" |
| 40831 | "int8 __ovld __cnfn shuffle2(int16 x, int16 y, uint8 mask);\n" |
| 40832 | "\n" |
| 40833 | "uint8 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint8 mask);\n" |
| 40834 | "uint8 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint8 mask);\n" |
| 40835 | "uint8 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint8 mask);\n" |
| 40836 | "uint8 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint8 mask);\n" |
| 40837 | "\n" |
| 40838 | "long8 __ovld __cnfn shuffle2(long2 x, long2 y, ulong8 mask);\n" |
| 40839 | "long8 __ovld __cnfn shuffle2(long4 x, long4 y, ulong8 mask);\n" |
| 40840 | "long8 __ovld __cnfn shuffle2(long8 x, long8 y, ulong8 mask);\n" |
| 40841 | "long8 __ovld __cnfn shuffle2(long16 x, long16 y, ulong8 mask);\n" |
| 40842 | "\n" |
| 40843 | "ulong8 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong8 mask);\n" |
| 40844 | "ulong8 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong8 mask);\n" |
| 40845 | "ulong8 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong8 mask);\n" |
| 40846 | "ulong8 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong8 mask);\n" |
| 40847 | "\n" |
| 40848 | "float8 __ovld __cnfn shuffle2(float2 x, float2 y, uint8 mask);\n" |
| 40849 | "float8 __ovld __cnfn shuffle2(float4 x, float4 y, uint8 mask);\n" |
| 40850 | "float8 __ovld __cnfn shuffle2(float8 x, float8 y, uint8 mask);\n" |
| 40851 | "float8 __ovld __cnfn shuffle2(float16 x, float16 y, uint8 mask);\n" |
| 40852 | "\n" |
| 40853 | "char16 __ovld __cnfn shuffle2(char2 x, char2 y, uchar16 mask);\n" |
| 40854 | "char16 __ovld __cnfn shuffle2(char4 x, char4 y, uchar16 mask);\n" |
| 40855 | "char16 __ovld __cnfn shuffle2(char8 x, char8 y, uchar16 mask);\n" |
| 40856 | "char16 __ovld __cnfn shuffle2(char16 x, char16 y, uchar16 mask);\n" |
| 40857 | "\n" |
| 40858 | "uchar16 __ovld __cnfn shuffle2(uchar2 x, uchar2 y, uchar16 mask);\n" |
| 40859 | "uchar16 __ovld __cnfn shuffle2(uchar4 x, uchar4 y, uchar16 mask);\n" |
| 40860 | "uchar16 __ovld __cnfn shuffle2(uchar8 x, uchar8 y, uchar16 mask);\n" |
| 40861 | "uchar16 __ovld __cnfn shuffle2(uchar16 x, uchar16 y, uchar16 mask);\n" |
| 40862 | "\n" |
| 40863 | "short16 __ovld __cnfn shuffle2(short2 x, short2 y, ushort16 mask);\n" |
| 40864 | "short16 __ovld __cnfn shuffle2(short4 x, short4 y, ushort16 mask);\n" |
| 40865 | "short16 __ovld __cnfn shuffle2(short8 x, short8 y, ushort16 mask);\n" |
| 40866 | "short16 __ovld __cnfn shuffle2(short16 x, short16 y, ushort16 mask);\n" |
| 40867 | "\n" |
| 40868 | "ushort16 __ovld __cnfn shuffle2(ushort2 x, ushort2 y, ushort16 mask);\n" |
| 40869 | "ushort16 __ovld __cnfn shuffle2(ushort4 x, ushort4 y, ushort16 mask);\n" |
| 40870 | "ushort16 __ovld __cnfn shuffle2(ushort8 x, ushort8 y, ushort16 mask);\n" |
| 40871 | "ushort16 __ovld __cnfn shuffle2(ushort16 x, ushort16 y, ushort16 mask);\n" |
| 40872 | "\n" |
| 40873 | "int16 __ovld __cnfn shuffle2(int2 x, int2 y, uint16 mask);\n" |
| 40874 | "int16 __ovld __cnfn shuffle2(int4 x, int4 y, uint16 mask);\n" |
| 40875 | "int16 __ovld __cnfn shuffle2(int8 x, int8 y, uint16 mask);\n" |
| 40876 | "int16 __ovld __cnfn shuffle2(int16 x, int16 y, uint16 mask);\n" |
| 40877 | "\n" |
| 40878 | "uint16 __ovld __cnfn shuffle2(uint2 x, uint2 y, uint16 mask);\n" |
| 40879 | "uint16 __ovld __cnfn shuffle2(uint4 x, uint4 y, uint16 mask);\n" |
| 40880 | "uint16 __ovld __cnfn shuffle2(uint8 x, uint8 y, uint16 mask);\n" |
| 40881 | "uint16 __ovld __cnfn shuffle2(uint16 x, uint16 y, uint16 mask);\n" |
| 40882 | "\n" |
| 40883 | "long16 __ovld __cnfn shuffle2(long2 x, long2 y, ulong16 mask);\n" |
| 40884 | "long16 __ovld __cnfn shuffle2(long4 x, long4 y, ulong16 mask);\n" |
| 40885 | "long16 __ovld __cnfn shuffle2(long8 x, long8 y, ulong16 mask);\n" |
| 40886 | "long16 __ovld __cnfn shuffle2(long16 x, long16 y, ulong16 mask);\n" |
| 40887 | "\n" |
| 40888 | "ulong16 __ovld __cnfn shuffle2(ulong2 x, ulong2 y, ulong16 mask);\n" |
| 40889 | "ulong16 __ovld __cnfn shuffle2(ulong4 x, ulong4 y, ulong16 mask);\n" |
| 40890 | "ulong16 __ovld __cnfn shuffle2(ulong8 x, ulong8 y, ulong16 mask);\n" |
| 40891 | "ulong16 __ovld __cnfn shuffle2(ulong16 x, ulong16 y, ulong16 mask);\n" |
| 40892 | "\n" |
| 40893 | "float16 __ovld __cnfn shuffle2(float2 x, float2 y, uint16 mask);\n" |
| 40894 | "float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask);\n" |
| 40895 | "float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask);\n" |
| 40896 | "float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask);\n" |
| 40897 | "\n" |
| 40898 | "#ifdef cl_khr_fp64\n" |
| 40899 | "double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask);\n" |
| 40900 | "double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask);\n" |
| 40901 | "double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask);\n" |
| 40902 | "double2 __ovld __cnfn shuffle2(double16 x, double16 y, ulong2 mask);\n" |
| 40903 | "\n" |
| 40904 | "double4 __ovld __cnfn shuffle2(double2 x, double2 y, ulong4 mask);\n" |
| 40905 | "double4 __ovld __cnfn shuffle2(double4 x, double4 y, ulong4 mask);\n" |
| 40906 | "double4 __ovld __cnfn shuffle2(double8 x, double8 y, ulong4 mask);\n" |
| 40907 | "double4 __ovld __cnfn shuffle2(double16 x, double16 y, ulong4 mask);\n" |
| 40908 | "\n" |
| 40909 | "double8 __ovld __cnfn shuffle2(double2 x, double2 y, ulong8 mask);\n" |
| 40910 | "double8 __ovld __cnfn shuffle2(double4 x, double4 y, ulong8 mask);\n" |
| 40911 | "double8 __ovld __cnfn shuffle2(double8 x, double8 y, ulong8 mask);\n" |
| 40912 | "double8 __ovld __cnfn shuffle2(double16 x, double16 y, ulong8 mask);\n" |
| 40913 | "\n" |
| 40914 | "double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask);\n" |
| 40915 | "double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask);\n" |
| 40916 | "double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask);\n" |
| 40917 | "double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask);\n" |
| 40918 | "#endif //cl_khr_fp64\n" |
| 40919 | "\n" |
| 40920 | "#ifdef cl_khr_fp16\n" |
| 40921 | "half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask);\n" |
| 40922 | "half2 __ovld __cnfn shuffle2(half4 x, half4 y, ushort2 mask);\n" |
| 40923 | "half2 __ovld __cnfn shuffle2(half8 x, half8 y, ushort2 mask);\n" |
| 40924 | "half2 __ovld __cnfn shuffle2(half16 x, half16 y, ushort2 mask);\n" |
| 40925 | "\n" |
| 40926 | "half4 __ovld __cnfn shuffle2(half2 x, half2 y, ushort4 mask);\n" |
| 40927 | "half4 __ovld __cnfn shuffle2(half4 x, half4 y, ushort4 mask);\n" |
| 40928 | "half4 __ovld __cnfn shuffle2(half8 x, half8 y, ushort4 mask);\n" |
| 40929 | "half4 __ovld __cnfn shuffle2(half16 x, half16 y, ushort4 mask);\n" |
| 40930 | "\n" |
| 40931 | "half8 __ovld __cnfn shuffle2(half2 x, half2 y, ushort8 mask);\n" |
| 40932 | "half8 __ovld __cnfn shuffle2(half4 x, half4 y, ushort8 mask);\n" |
| 40933 | "half8 __ovld __cnfn shuffle2(half8 x, half8 y, ushort8 mask);\n" |
| 40934 | "half8 __ovld __cnfn shuffle2(half16 x, half16 y, ushort8 mask);\n" |
| 40935 | "\n" |
| 40936 | "half16 __ovld __cnfn shuffle2(half2 x, half2 y, ushort16 mask);\n" |
| 40937 | "half16 __ovld __cnfn shuffle2(half4 x, half4 y, ushort16 mask);\n" |
| 40938 | "half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask);\n" |
| 40939 | "half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);\n" |
| 40940 | "#endif //cl_khr_fp16\n" |
| 40941 | "\n" |
| 40942 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2\n" |
| 40943 | "// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf\n" |
| 40944 | "\n" |
| 40945 | "int printf(__constant const char* st, ...);\n" |
| 40946 | "#endif\n" |
| 40947 | "\n" |
| 40948 | "// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions\n" |
| 40949 | "\n" |
| 40950 | "// These values need to match the runtime equivalent\n" |
| 40951 | "//\n" |
| 40952 | "// Addressing Mode.\n" |
| 40953 | "//\n" |
| 40954 | "#define CLK_ADDRESS_NONE 0\n" |
| 40955 | "#define CLK_ADDRESS_CLAMP_TO_EDGE 2\n" |
| 40956 | "#define CLK_ADDRESS_CLAMP 4\n" |
| 40957 | "#define CLK_ADDRESS_REPEAT 6\n" |
| 40958 | "#define CLK_ADDRESS_MIRRORED_REPEAT 8\n" |
| 40959 | "\n" |
| 40960 | "//\n" |
| 40961 | "// Coordination Normalization\n" |
| 40962 | "//\n" |
| 40963 | "#define CLK_NORMALIZED_COORDS_FALSE 0\n" |
| 40964 | "#define CLK_NORMALIZED_COORDS_TRUE 1\n" |
| 40965 | "\n" |
| 40966 | "//\n" |
| 40967 | "// Filtering Mode.\n" |
| 40968 | "//\n" |
| 40969 | "#define CLK_FILTER_NEAREST 0x10\n" |
| 40970 | "#define CLK_FILTER_LINEAR 0x20\n" |
| 40971 | "\n" |
| 40972 | "#ifdef cl_khr_gl_msaa_sharing\n" |
| 40973 | "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" |
| 40974 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 40975 | "\n" |
| 40976 | "/**\n" |
| 40977 | " * Use the coordinate (coord.xy) to do an element lookup in\n" |
| 40978 | " * the 2D image object specified by image.\n" |
| 40979 | " *\n" |
| 40980 | " * Use the coordinate (coord.x, coord.y, coord.z) to do\n" |
| 40981 | " * an element lookup in the 3D image object specified\n" |
| 40982 | " * by image. coord.w is ignored.\n" |
| 40983 | " *\n" |
| 40984 | " * Use the coordinate (coord.z) to index into the\n" |
| 40985 | " * 2D image array object specified by image_array\n" |
| 40986 | " * and (coord.x, coord.y) to do an element lookup in\n" |
| 40987 | " * the 2D image object specified by image.\n" |
| 40988 | " *\n" |
| 40989 | " * Use the coordinate (x) to do an element lookup in\n" |
| 40990 | " * the 1D image object specified by image.\n" |
| 40991 | " *\n" |
| 40992 | " * Use the coordinate (coord.y) to index into the\n" |
| 40993 | " * 1D image array object specified by image_array\n" |
| 40994 | " * and (coord.x) to do an element lookup in\n" |
| 40995 | " * the 1D image object specified by image.\n" |
| 40996 | " *\n" |
| 40997 | " * Use the coordinate (cood.xy) and sample to do an\n" |
| 40998 | " * element lookup in the 2D multi-sample image specified\n" |
| 40999 | " * by image.\n" |
| 41000 | " *\n" |
| 41001 | " * Use coord.xy and sample to do an element\n" |
| 41002 | " * lookup in the 2D multi-sample image layer\n" |
| 41003 | " * identified by index coord.z in the 2D multi-sample\n" |
| 41004 | " * image array specified by image.\n" |
| 41005 | " *\n" |
| 41006 | " * For mipmap images, use the mip-level specified by\n" |
| 41007 | " * the Level-of-Detail (lod) or use gradients for LOD\n" |
| 41008 | " * computation.\n" |
| 41009 | " *\n" |
| 41010 | " * read_imagef returns floating-point values in the\n" |
| 41011 | " * range [0.0 ... 1.0] for image objects created with\n" |
| 41012 | " * image_channel_data_type set to one of the predefined\n" |
| 41013 | " * packed formats or CL_UNORM_INT8, or\n" |
| 41014 | " * CL_UNORM_INT16.\n" |
| 41015 | " *\n" |
| 41016 | " * read_imagef returns floating-point values in the\n" |
| 41017 | " * range [-1.0 ... 1.0] for image objects created with\n" |
| 41018 | " * image_channel_data_type set to CL_SNORM_INT8,\n" |
| 41019 | " * or CL_SNORM_INT16.\n" |
| 41020 | " *\n" |
| 41021 | " * read_imagef returns floating-point values for image\n" |
| 41022 | " * objects created with image_channel_data_type set to\n" |
| 41023 | " * CL_HALF_FLOAT or CL_FLOAT.\n" |
| 41024 | " *\n" |
| 41025 | " * read_imagei and read_imageui return\n" |
| 41026 | " * unnormalized signed integer and unsigned integer\n" |
| 41027 | " * values respectively. Each channel will be stored in a\n" |
| 41028 | " * 32-bit integer.\n" |
| 41029 | " *\n" |
| 41030 | " * read_imagei can only be used with image objects\n" |
| 41031 | " * created with image_channel_data_type set to one of\n" |
| 41032 | " * the following values:\n" |
| 41033 | " * CL_SIGNED_INT8,\n" |
| 41034 | " * CL_SIGNED_INT16 and\n" |
| 41035 | " * CL_SIGNED_INT32.\n" |
| 41036 | " * If the image_channel_data_type is not one of the\n" |
| 41037 | " * above values, the values returned by read_imagei\n" |
| 41038 | " * are undefined.\n" |
| 41039 | " *\n" |
| 41040 | " * read_imageui can only be used with image objects\n" |
| 41041 | " * created with image_channel_data_type set to one of\n" |
| 41042 | " * the following values:\n" |
| 41043 | " * CL_UNSIGNED_INT8,\n" |
| 41044 | " * CL_UNSIGNED_INT16 and\n" |
| 41045 | " * CL_UNSIGNED_INT32.\n" |
| 41046 | " * If the image_channel_data_type is not one of the\n" |
| 41047 | " * above values, the values returned by read_imageui\n" |
| 41048 | " * are undefined.\n" |
| 41049 | " *\n" |
| 41050 | " * The read_image{i|ui} calls support a nearest filter\n" |
| 41051 | " * only. The filter_mode specified in sampler\n" |
| 41052 | " * must be set to CLK_FILTER_NEAREST; otherwise\n" |
| 41053 | " * the values returned are undefined.\n" |
| 41054 | "\n" |
| 41055 | " * The read_image{f|i|ui} calls that take\n" |
| 41056 | " * integer coordinates must use a sampler with\n" |
| 41057 | " * normalized coordinates set to\n" |
| 41058 | " * CLK_NORMALIZED_COORDS_FALSE and\n" |
| 41059 | " * addressing mode set to\n" |
| 41060 | " * CLK_ADDRESS_CLAMP_TO_EDGE,\n" |
| 41061 | " * CLK_ADDRESS_CLAMP or CLK_ADDRESS_NONE;\n" |
| 41062 | " * otherwise the values returned are undefined.\n" |
| 41063 | " *\n" |
| 41064 | " * Values returned by read_imagef for image objects\n" |
| 41065 | " * with image_channel_data_type values not specified\n" |
| 41066 | " * in the description above are undefined.\n" |
| 41067 | " */\n" |
| 41068 | "\n" |
| 41069 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, int2 coord);\n" |
| 41070 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord);\n" |
| 41071 | "\n" |
| 41072 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, int2 coord);\n" |
| 41073 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord);\n" |
| 41074 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, int2 coord);\n" |
| 41075 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord);\n" |
| 41076 | "\n" |
| 41077 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, int4 coord);\n" |
| 41078 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord);\n" |
| 41079 | "\n" |
| 41080 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, int4 coord);\n" |
| 41081 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord);\n" |
| 41082 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);\n" |
| 41083 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);\n" |
| 41084 | "\n" |
| 41085 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n" |
| 41086 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n" |
| 41087 | "\n" |
| 41088 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n" |
| 41089 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n" |
| 41090 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);\n" |
| 41091 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);\n" |
| 41092 | "\n" |
| 41093 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);\n" |
| 41094 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);\n" |
| 41095 | "\n" |
| 41096 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, int coord);\n" |
| 41097 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord);\n" |
| 41098 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);\n" |
| 41099 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);\n" |
| 41100 | "\n" |
| 41101 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n" |
| 41102 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n" |
| 41103 | "\n" |
| 41104 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n" |
| 41105 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n" |
| 41106 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);\n" |
| 41107 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);\n" |
| 41108 | "\n" |
| 41109 | "#ifdef cl_khr_depth_images\n" |
| 41110 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);\n" |
| 41111 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, int2 coord);\n" |
| 41112 | "\n" |
| 41113 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord);\n" |
| 41114 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, int4 coord);\n" |
| 41115 | "#endif //cl_khr_depth_images\n" |
| 41116 | "\n" |
| 41117 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41118 | "float4 __purefn __ovld read_imagef(read_only image2d_msaa_t image, int2 coord, int sample);\n" |
| 41119 | "int4 __purefn __ovld read_imagei(read_only image2d_msaa_t image, int2 coord, int sample);\n" |
| 41120 | "uint4 __purefn __ovld read_imageui(read_only image2d_msaa_t image, int2 coord, int sample);\n" |
| 41121 | "\n" |
| 41122 | "float __purefn __ovld read_imagef(read_only image2d_msaa_depth_t image, int2 coord, int sample);\n" |
| 41123 | "\n" |
| 41124 | "float4 __purefn __ovld read_imagef(read_only image2d_array_msaa_t image, int4 coord, int sample);\n" |
| 41125 | "int4 __purefn __ovld read_imagei(read_only image2d_array_msaa_t image, int4 coord, int sample);\n" |
| 41126 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_msaa_t image, int4 coord, int sample);\n" |
| 41127 | "\n" |
| 41128 | "float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, int4 coord, int sample);\n" |
| 41129 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41130 | "\n" |
| 41131 | "// OpenCL Extension v2.0 s9.18 - Mipmaps\n" |
| 41132 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41133 | "#ifdef cl_khr_mipmap_image\n" |
| 41134 | "\n" |
| 41135 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41136 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41137 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41138 | "\n" |
| 41139 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41140 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41141 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41142 | "\n" |
| 41143 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41144 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41145 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41146 | "\n" |
| 41147 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41148 | "\n" |
| 41149 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41150 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41151 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41152 | "\n" |
| 41153 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41154 | "\n" |
| 41155 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41156 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41157 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41158 | "\n" |
| 41159 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
| 41160 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
| 41161 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
| 41162 | "\n" |
| 41163 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
| 41164 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
| 41165 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
| 41166 | "\n" |
| 41167 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
| 41168 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
| 41169 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
| 41170 | "\n" |
| 41171 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
| 41172 | "\n" |
| 41173 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
| 41174 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
| 41175 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
| 41176 | "\n" |
| 41177 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
| 41178 | "\n" |
| 41179 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
| 41180 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
| 41181 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
| 41182 | "\n" |
| 41183 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41184 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41185 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41186 | "\n" |
| 41187 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41188 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41189 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41190 | "\n" |
| 41191 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41192 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41193 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41194 | "\n" |
| 41195 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41196 | "\n" |
| 41197 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41198 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41199 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41200 | "\n" |
| 41201 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41202 | "\n" |
| 41203 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41204 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41205 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41206 | "\n" |
| 41207 | "#endif //cl_khr_mipmap_image\n" |
| 41208 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41209 | "\n" |
| 41210 | "/**\n" |
| 41211 | "* Sampler-less Image Access\n" |
| 41212 | "*/\n" |
| 41213 | "\n" |
| 41214 | "float4 __purefn __ovld read_imagef(read_only image1d_t image, int coord);\n" |
| 41215 | "int4 __purefn __ovld read_imagei(read_only image1d_t image, int coord);\n" |
| 41216 | "uint4 __purefn __ovld read_imageui(read_only image1d_t image, int coord);\n" |
| 41217 | "\n" |
| 41218 | "float4 __purefn __ovld read_imagef(read_only image1d_buffer_t image, int coord);\n" |
| 41219 | "int4 __purefn __ovld read_imagei(read_only image1d_buffer_t image, int coord);\n" |
| 41220 | "uint4 __purefn __ovld read_imageui(read_only image1d_buffer_t image, int coord);\n" |
| 41221 | "\n" |
| 41222 | "float4 __purefn __ovld read_imagef(read_only image1d_array_t image, int2 coord);\n" |
| 41223 | "int4 __purefn __ovld read_imagei(read_only image1d_array_t image, int2 coord);\n" |
| 41224 | "uint4 __purefn __ovld read_imageui(read_only image1d_array_t image, int2 coord);\n" |
| 41225 | "\n" |
| 41226 | "float4 __purefn __ovld read_imagef(read_only image2d_t image, int2 coord);\n" |
| 41227 | "int4 __purefn __ovld read_imagei(read_only image2d_t image, int2 coord);\n" |
| 41228 | "uint4 __purefn __ovld read_imageui(read_only image2d_t image, int2 coord);\n" |
| 41229 | "\n" |
| 41230 | "float4 __purefn __ovld read_imagef(read_only image2d_array_t image, int4 coord);\n" |
| 41231 | "int4 __purefn __ovld read_imagei(read_only image2d_array_t image, int4 coord);\n" |
| 41232 | "uint4 __purefn __ovld read_imageui(read_only image2d_array_t image, int4 coord);\n" |
| 41233 | "\n" |
| 41234 | "#ifdef cl_khr_depth_images\n" |
| 41235 | "float __purefn __ovld read_imagef(read_only image2d_depth_t image, int2 coord);\n" |
| 41236 | "float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, int4 coord);\n" |
| 41237 | "#endif //cl_khr_depth_images\n" |
| 41238 | "\n" |
| 41239 | "float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);\n" |
| 41240 | "int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);\n" |
| 41241 | "uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);\n" |
| 41242 | "\n" |
| 41243 | "// Image read functions returning half4 type\n" |
| 41244 | "#ifdef cl_khr_fp16\n" |
| 41245 | "half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, int coord);\n" |
| 41246 | "half4 __purefn __ovld read_imageh(read_only image1d_t image, sampler_t sampler, float coord);\n" |
| 41247 | "half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);\n" |
| 41248 | "half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);\n" |
| 41249 | "half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, int2 coord);\n" |
| 41250 | "half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);\n" |
| 41251 | "half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);\n" |
| 41252 | "half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);\n" |
| 41253 | "half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);\n" |
| 41254 | "half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, float4 coord);\n" |
| 41255 | "half4 __purefn __ovld read_imageh(read_only image1d_t image, int coord);\n" |
| 41256 | "half4 __purefn __ovld read_imageh(read_only image2d_t image, int2 coord);\n" |
| 41257 | "half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);\n" |
| 41258 | "half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);\n" |
| 41259 | "half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);\n" |
| 41260 | "half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);\n" |
| 41261 | "#endif //cl_khr_fp16\n" |
| 41262 | "\n" |
| 41263 | "// Image read functions for read_write images\n" |
| 41264 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41265 | "float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);\n" |
| 41266 | "int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);\n" |
| 41267 | "uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);\n" |
| 41268 | "\n" |
| 41269 | "float4 __purefn __ovld read_imagef(read_write image1d_buffer_t image, int coord);\n" |
| 41270 | "int4 __purefn __ovld read_imagei(read_write image1d_buffer_t image, int coord);\n" |
| 41271 | "uint4 __purefn __ovld read_imageui(read_write image1d_buffer_t image, int coord);\n" |
| 41272 | "\n" |
| 41273 | "float4 __purefn __ovld read_imagef(read_write image1d_array_t image, int2 coord);\n" |
| 41274 | "int4 __purefn __ovld read_imagei(read_write image1d_array_t image, int2 coord);\n" |
| 41275 | "uint4 __purefn __ovld read_imageui(read_write image1d_array_t image, int2 coord);\n" |
| 41276 | "\n" |
| 41277 | "float4 __purefn __ovld read_imagef(read_write image2d_t image, int2 coord);\n" |
| 41278 | "int4 __purefn __ovld read_imagei(read_write image2d_t image, int2 coord);\n" |
| 41279 | "uint4 __purefn __ovld read_imageui(read_write image2d_t image, int2 coord);\n" |
| 41280 | "\n" |
| 41281 | "float4 __purefn __ovld read_imagef(read_write image2d_array_t image, int4 coord);\n" |
| 41282 | "int4 __purefn __ovld read_imagei(read_write image2d_array_t image, int4 coord);\n" |
| 41283 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_t image, int4 coord);\n" |
| 41284 | "\n" |
| 41285 | "float4 __purefn __ovld read_imagef(read_write image3d_t image, int4 coord);\n" |
| 41286 | "int4 __purefn __ovld read_imagei(read_write image3d_t image, int4 coord);\n" |
| 41287 | "uint4 __purefn __ovld read_imageui(read_write image3d_t image, int4 coord);\n" |
| 41288 | "\n" |
| 41289 | "#ifdef cl_khr_depth_images\n" |
| 41290 | "float __purefn __ovld read_imagef(read_write image2d_depth_t image, int2 coord);\n" |
| 41291 | "float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, int4 coord);\n" |
| 41292 | "#endif //cl_khr_depth_images\n" |
| 41293 | "\n" |
| 41294 | "#if cl_khr_gl_msaa_sharing\n" |
| 41295 | "float4 __purefn __ovld read_imagef(read_write image2d_msaa_t image, int2 coord, int sample);\n" |
| 41296 | "int4 __purefn __ovld read_imagei(read_write image2d_msaa_t image, int2 coord, int sample);\n" |
| 41297 | "uint4 __purefn __ovld read_imageui(read_write image2d_msaa_t image, int2 coord, int sample);\n" |
| 41298 | "\n" |
| 41299 | "float4 __purefn __ovld read_imagef(read_write image2d_array_msaa_t image, int4 coord, int sample);\n" |
| 41300 | "int4 __purefn __ovld read_imagei(read_write image2d_array_msaa_t image, int4 coord, int sample);\n" |
| 41301 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_msaa_t image, int4 coord, int sample);\n" |
| 41302 | "\n" |
| 41303 | "float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 coord, int sample);\n" |
| 41304 | "float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);\n" |
| 41305 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41306 | "\n" |
| 41307 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41308 | "#ifdef cl_khr_mipmap_image\n" |
| 41309 | "float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41310 | "int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41311 | "uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41312 | "\n" |
| 41313 | "float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41314 | "int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41315 | "uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41316 | "\n" |
| 41317 | "float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41318 | "int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41319 | "uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41320 | "\n" |
| 41321 | "float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41322 | "\n" |
| 41323 | "float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41324 | "int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41325 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41326 | "\n" |
| 41327 | "float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41328 | "\n" |
| 41329 | "float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41330 | "int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41331 | "uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41332 | "\n" |
| 41333 | "float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
| 41334 | "int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
| 41335 | "uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float gradientX, float gradientY);\n" |
| 41336 | "\n" |
| 41337 | "float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
| 41338 | "int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
| 41339 | "uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float gradientX, float gradientY);\n" |
| 41340 | "\n" |
| 41341 | "float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
| 41342 | "int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
| 41343 | "uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
| 41344 | "\n" |
| 41345 | "float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float2 gradientX, float2 gradientY);\n" |
| 41346 | "\n" |
| 41347 | "float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
| 41348 | "int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
| 41349 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
| 41350 | "\n" |
| 41351 | "float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float2 gradientX, float2 gradientY);\n" |
| 41352 | "\n" |
| 41353 | "float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
| 41354 | "int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
| 41355 | "uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);\n" |
| 41356 | "\n" |
| 41357 | "float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41358 | "int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41359 | "uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod);\n" |
| 41360 | "\n" |
| 41361 | "float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41362 | "int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41363 | "uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod);\n" |
| 41364 | "\n" |
| 41365 | "float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41366 | "int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41367 | "uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41368 | "\n" |
| 41369 | "float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod);\n" |
| 41370 | "\n" |
| 41371 | "float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41372 | "int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41373 | "uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod);\n" |
| 41374 | "\n" |
| 41375 | "float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41376 | "\n" |
| 41377 | "float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41378 | "int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41379 | "uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod);\n" |
| 41380 | "#endif //cl_khr_mipmap_image\n" |
| 41381 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41382 | "\n" |
| 41383 | "// Image read functions returning half4 type\n" |
| 41384 | "#ifdef cl_khr_fp16\n" |
| 41385 | "half4 __purefn __ovld read_imageh(read_write image1d_t image, int coord);\n" |
| 41386 | "half4 __purefn __ovld read_imageh(read_write image2d_t image, int2 coord);\n" |
| 41387 | "half4 __purefn __ovld read_imageh(read_write image3d_t image, int4 coord);\n" |
| 41388 | "half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);\n" |
| 41389 | "half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);\n" |
| 41390 | "half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);\n" |
| 41391 | "#endif //cl_khr_fp16\n" |
| 41392 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41393 | "\n" |
| 41394 | "/**\n" |
| 41395 | " * Write color value to location specified by coordinate\n" |
| 41396 | " * (coord.x, coord.y) in the 2D image object specified by image.\n" |
| 41397 | " * (coord.x, coord.y) are considered to be unnormalized coordinates\n" |
| 41398 | " * and must be in the range 0 ... image width - 1, and 0\n" |
| 41399 | " * ... image height - 1.\n" |
| 41400 | "\n" |
| 41401 | " * Write color value to location specified by coordinate\n" |
| 41402 | " * (coord.x, coord.y) in the 2D image object specified by index\n" |
| 41403 | " * (coord.z) of the 2D image array object image_array.\n" |
| 41404 | " * (coord.x, coord.y) are considered to be unnormalized\n" |
| 41405 | " * coordinates and must be in the range 0 ... image width\n" |
| 41406 | " * - 1.\n" |
| 41407 | " *\n" |
| 41408 | " * Write color value to location specified by coordinate\n" |
| 41409 | " * (coord) in the 1D image (buffer) object specified by image.\n" |
| 41410 | " * coord is considered to be unnormalized coordinates\n" |
| 41411 | " * and must be in the range 0 ... image width - 1.\n" |
| 41412 | " *\n" |
| 41413 | " * Write color value to location specified by coordinate\n" |
| 41414 | " * (coord.x) in the 1D image object specified by index\n" |
| 41415 | " * (coord.y) of the 1D image array object image_array.\n" |
| 41416 | " * x is considered to be unnormalized coordinates\n" |
| 41417 | " * and must be in the range 0 ... image width - 1.\n" |
| 41418 | " *\n" |
| 41419 | " * Write color value to location specified by coordinate\n" |
| 41420 | " * (coord.x, coord.y, coord.z) in the 3D image object specified by image.\n" |
| 41421 | " * coord.x & coord.y are considered to be unnormalized coordinates\n" |
| 41422 | " * and must be in the range 0 ... image width - 1, and 0\n" |
| 41423 | " * ... image height - 1.\n" |
| 41424 | " *\n" |
| 41425 | " * For mipmap images, use mip-level specified by lod.\n" |
| 41426 | " *\n" |
| 41427 | " * Appropriate data format conversion to the specified\n" |
| 41428 | " * image format is done before writing the color value.\n" |
| 41429 | " *\n" |
| 41430 | " * write_imagef can only be used with image objects\n" |
| 41431 | " * created with image_channel_data_type set to one of\n" |
| 41432 | " * the pre-defined packed formats or set to\n" |
| 41433 | " * CL_SNORM_INT8, CL_UNORM_INT8,\n" |
| 41434 | " * CL_SNORM_INT16, CL_UNORM_INT16,\n" |
| 41435 | " * CL_HALF_FLOAT or CL_FLOAT. Appropriate data\n" |
| 41436 | " * format conversion will be done to convert channel\n" |
| 41437 | " * data from a floating-point value to actual data format\n" |
| 41438 | " * in which the channels are stored.\n" |
| 41439 | " *\n" |
| 41440 | " * write_imagei can only be used with image objects\n" |
| 41441 | " * created with image_channel_data_type set to one of\n" |
| 41442 | " * the following values:\n" |
| 41443 | " * CL_SIGNED_INT8,\n" |
| 41444 | " * CL_SIGNED_INT16 and\n" |
| 41445 | " * CL_SIGNED_INT32.\n" |
| 41446 | " *\n" |
| 41447 | " * write_imageui can only be used with image objects\n" |
| 41448 | " * created with image_channel_data_type set to one of\n" |
| 41449 | " * the following values:\n" |
| 41450 | " * CL_UNSIGNED_INT8,\n" |
| 41451 | " * CL_UNSIGNED_INT16 and\n" |
| 41452 | " * CL_UNSIGNED_INT32.\n" |
| 41453 | " *\n" |
| 41454 | " * The behavior of write_imagef, write_imagei and\n" |
| 41455 | " * write_imageui for image objects created with\n" |
| 41456 | " * image_channel_data_type values not specified in\n" |
| 41457 | " * the description above or with (x, y) coordinate\n" |
| 41458 | " * values that are not in the range (0 ... image width -1,\n" |
| 41459 | " * 0 ... image height - 1), respectively, is undefined.\n" |
| 41460 | " */\n" |
| 41461 | "void __ovld write_imagef(write_only image2d_t image, int2 coord, float4 color);\n" |
| 41462 | "void __ovld write_imagei(write_only image2d_t image, int2 coord, int4 color);\n" |
| 41463 | "void __ovld write_imageui(write_only image2d_t image, int2 coord, uint4 color);\n" |
| 41464 | "\n" |
| 41465 | "void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, float4 color);\n" |
| 41466 | "void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int4 color);\n" |
| 41467 | "void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, uint4 color);\n" |
| 41468 | "\n" |
| 41469 | "void __ovld write_imagef(write_only image1d_t image, int coord, float4 color);\n" |
| 41470 | "void __ovld write_imagei(write_only image1d_t image, int coord, int4 color);\n" |
| 41471 | "void __ovld write_imageui(write_only image1d_t image, int coord, uint4 color);\n" |
| 41472 | "\n" |
| 41473 | "void __ovld write_imagef(write_only image1d_buffer_t image, int coord, float4 color);\n" |
| 41474 | "void __ovld write_imagei(write_only image1d_buffer_t image, int coord, int4 color);\n" |
| 41475 | "void __ovld write_imageui(write_only image1d_buffer_t image, int coord, uint4 color);\n" |
| 41476 | "\n" |
| 41477 | "void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, float4 color);\n" |
| 41478 | "void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color);\n" |
| 41479 | "void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color);\n" |
| 41480 | "\n" |
| 41481 | "#ifdef cl_khr_3d_image_writes\n" |
| 41482 | "void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color);\n" |
| 41483 | "void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color);\n" |
| 41484 | "void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color);\n" |
| 41485 | "#endif\n" |
| 41486 | "\n" |
| 41487 | "#ifdef cl_khr_depth_images\n" |
| 41488 | "void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, float color);\n" |
| 41489 | "void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, float color);\n" |
| 41490 | "#endif //cl_khr_depth_images\n" |
| 41491 | "\n" |
| 41492 | "// OpenCL Extension v2.0 s9.18 - Mipmaps\n" |
| 41493 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41494 | "#ifdef cl_khr_mipmap_image\n" |
| 41495 | "void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color);\n" |
| 41496 | "void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color);\n" |
| 41497 | "void __ovld write_imageui(write_only image1d_t image, int coord, int lod, uint4 color);\n" |
| 41498 | "\n" |
| 41499 | "void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, int lod, float4 color);\n" |
| 41500 | "void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int lod, int4 color);\n" |
| 41501 | "void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, int lod, uint4 color);\n" |
| 41502 | "\n" |
| 41503 | "void __ovld write_imagef(write_only image2d_t image, int2 coord, int lod, float4 color);\n" |
| 41504 | "void __ovld write_imagei(write_only image2d_t image, int2 coord, int lod, int4 color);\n" |
| 41505 | "void __ovld write_imageui(write_only image2d_t image, int2 coord, int lod, uint4 color);\n" |
| 41506 | "\n" |
| 41507 | "void __ovld write_imagef(write_only image2d_array_t image_array, int4 coord, int lod, float4 color);\n" |
| 41508 | "void __ovld write_imagei(write_only image2d_array_t image_array, int4 coord, int lod, int4 color);\n" |
| 41509 | "void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, int lod, uint4 color);\n" |
| 41510 | "\n" |
| 41511 | "void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float color);\n" |
| 41512 | "void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float color);\n" |
| 41513 | "\n" |
| 41514 | "#ifdef cl_khr_3d_image_writes\n" |
| 41515 | "void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color);\n" |
| 41516 | "void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color);\n" |
| 41517 | "void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);\n" |
| 41518 | "#endif\n" |
| 41519 | "#endif //cl_khr_mipmap_image\n" |
| 41520 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41521 | "\n" |
| 41522 | "// Image write functions for half4 type\n" |
| 41523 | "#ifdef cl_khr_fp16\n" |
| 41524 | "void __ovld write_imageh(write_only image1d_t image, int coord, half4 color);\n" |
| 41525 | "void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color);\n" |
| 41526 | "#ifdef cl_khr_3d_image_writes\n" |
| 41527 | "void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color);\n" |
| 41528 | "#endif\n" |
| 41529 | "void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color);\n" |
| 41530 | "void __ovld write_imageh(write_only image2d_array_t image, int4 coord, half4 color);\n" |
| 41531 | "void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 color);\n" |
| 41532 | "#endif //cl_khr_fp16\n" |
| 41533 | "\n" |
| 41534 | "// Image write functions for read_write images\n" |
| 41535 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41536 | "void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);\n" |
| 41537 | "void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);\n" |
| 41538 | "void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);\n" |
| 41539 | "\n" |
| 41540 | "void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, float4 color);\n" |
| 41541 | "void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int4 color);\n" |
| 41542 | "void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, uint4 color);\n" |
| 41543 | "\n" |
| 41544 | "void __ovld write_imagef(read_write image1d_t image, int coord, float4 color);\n" |
| 41545 | "void __ovld write_imagei(read_write image1d_t image, int coord, int4 color);\n" |
| 41546 | "void __ovld write_imageui(read_write image1d_t image, int coord, uint4 color);\n" |
| 41547 | "\n" |
| 41548 | "void __ovld write_imagef(read_write image1d_buffer_t image, int coord, float4 color);\n" |
| 41549 | "void __ovld write_imagei(read_write image1d_buffer_t image, int coord, int4 color);\n" |
| 41550 | "void __ovld write_imageui(read_write image1d_buffer_t image, int coord, uint4 color);\n" |
| 41551 | "\n" |
| 41552 | "void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, float4 color);\n" |
| 41553 | "void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color);\n" |
| 41554 | "void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color);\n" |
| 41555 | "\n" |
| 41556 | "#ifdef cl_khr_3d_image_writes\n" |
| 41557 | "void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color);\n" |
| 41558 | "void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color);\n" |
| 41559 | "void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color);\n" |
| 41560 | "#endif\n" |
| 41561 | "\n" |
| 41562 | "#ifdef cl_khr_depth_images\n" |
| 41563 | "void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float color);\n" |
| 41564 | "void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);\n" |
| 41565 | "#endif //cl_khr_depth_images\n" |
| 41566 | "\n" |
| 41567 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41568 | "#ifdef cl_khr_mipmap_image\n" |
| 41569 | "void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);\n" |
| 41570 | "void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);\n" |
| 41571 | "void __ovld write_imageui(read_write image1d_t image, int coord, int lod, uint4 color);\n" |
| 41572 | "\n" |
| 41573 | "void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, int lod, float4 color);\n" |
| 41574 | "void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int lod, int4 color);\n" |
| 41575 | "void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, int lod, uint4 color);\n" |
| 41576 | "\n" |
| 41577 | "void __ovld write_imagef(read_write image2d_t image, int2 coord, int lod, float4 color);\n" |
| 41578 | "void __ovld write_imagei(read_write image2d_t image, int2 coord, int lod, int4 color);\n" |
| 41579 | "void __ovld write_imageui(read_write image2d_t image, int2 coord, int lod, uint4 color);\n" |
| 41580 | "\n" |
| 41581 | "void __ovld write_imagef(read_write image2d_array_t image_array, int4 coord, int lod, float4 color);\n" |
| 41582 | "void __ovld write_imagei(read_write image2d_array_t image_array, int4 coord, int lod, int4 color);\n" |
| 41583 | "void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, int lod, uint4 color);\n" |
| 41584 | "\n" |
| 41585 | "void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color);\n" |
| 41586 | "void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color);\n" |
| 41587 | "\n" |
| 41588 | "#ifdef cl_khr_3d_image_writes\n" |
| 41589 | "void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color);\n" |
| 41590 | "void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color);\n" |
| 41591 | "void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);\n" |
| 41592 | "#endif\n" |
| 41593 | "#endif //cl_khr_mipmap_image\n" |
| 41594 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41595 | "\n" |
| 41596 | "// Image write functions for half4 type\n" |
| 41597 | "#ifdef cl_khr_fp16\n" |
| 41598 | "void __ovld write_imageh(read_write image1d_t image, int coord, half4 color);\n" |
| 41599 | "void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color);\n" |
| 41600 | "#ifdef cl_khr_3d_image_writes\n" |
| 41601 | "void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color);\n" |
| 41602 | "#endif\n" |
| 41603 | "void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color);\n" |
| 41604 | "void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);\n" |
| 41605 | "void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);\n" |
| 41606 | "#endif //cl_khr_fp16\n" |
| 41607 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41608 | "\n" |
| 41609 | "// Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have\n" |
| 41610 | "// access qualifier, which by default assume read_only access qualifier. Image query builtin\n" |
| 41611 | "// functions with write_only image argument should also be declared.\n" |
| 41612 | "\n" |
| 41613 | "/**\n" |
| 41614 | " * Return the image width in pixels.\n" |
| 41615 | " *\n" |
| 41616 | " */\n" |
| 41617 | "int __ovld __cnfn get_image_width(read_only image1d_t image);\n" |
| 41618 | "int __ovld __cnfn get_image_width(read_only image1d_buffer_t image);\n" |
| 41619 | "int __ovld __cnfn get_image_width(read_only image2d_t image);\n" |
| 41620 | "#ifdef cl_khr_3d_image_writes\n" |
| 41621 | "int __ovld __cnfn get_image_width(read_only image3d_t image);\n" |
| 41622 | "#endif\n" |
| 41623 | "int __ovld __cnfn get_image_width(read_only image1d_array_t image);\n" |
| 41624 | "int __ovld __cnfn get_image_width(read_only image2d_array_t image);\n" |
| 41625 | "#ifdef cl_khr_depth_images\n" |
| 41626 | "int __ovld __cnfn get_image_width(read_only image2d_depth_t image);\n" |
| 41627 | "int __ovld __cnfn get_image_width(read_only image2d_array_depth_t image);\n" |
| 41628 | "#endif //cl_khr_depth_images\n" |
| 41629 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41630 | "int __ovld __cnfn get_image_width(read_only image2d_msaa_t image);\n" |
| 41631 | "int __ovld __cnfn get_image_width(read_only image2d_msaa_depth_t image);\n" |
| 41632 | "int __ovld __cnfn get_image_width(read_only image2d_array_msaa_t image);\n" |
| 41633 | "int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image);\n" |
| 41634 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41635 | "\n" |
| 41636 | "int __ovld __cnfn get_image_width(write_only image1d_t image);\n" |
| 41637 | "int __ovld __cnfn get_image_width(write_only image1d_buffer_t image);\n" |
| 41638 | "int __ovld __cnfn get_image_width(write_only image2d_t image);\n" |
| 41639 | "#ifdef cl_khr_3d_image_writes\n" |
| 41640 | "int __ovld __cnfn get_image_width(write_only image3d_t image);\n" |
| 41641 | "#endif\n" |
| 41642 | "int __ovld __cnfn get_image_width(write_only image1d_array_t image);\n" |
| 41643 | "int __ovld __cnfn get_image_width(write_only image2d_array_t image);\n" |
| 41644 | "#ifdef cl_khr_depth_images\n" |
| 41645 | "int __ovld __cnfn get_image_width(write_only image2d_depth_t image);\n" |
| 41646 | "int __ovld __cnfn get_image_width(write_only image2d_array_depth_t image);\n" |
| 41647 | "#endif //cl_khr_depth_images\n" |
| 41648 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41649 | "int __ovld __cnfn get_image_width(write_only image2d_msaa_t image);\n" |
| 41650 | "int __ovld __cnfn get_image_width(write_only image2d_msaa_depth_t image);\n" |
| 41651 | "int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);\n" |
| 41652 | "int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);\n" |
| 41653 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41654 | "\n" |
| 41655 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41656 | "int __ovld __cnfn get_image_width(read_write image1d_t image);\n" |
| 41657 | "int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);\n" |
| 41658 | "int __ovld __cnfn get_image_width(read_write image2d_t image);\n" |
| 41659 | "int __ovld __cnfn get_image_width(read_write image3d_t image);\n" |
| 41660 | "int __ovld __cnfn get_image_width(read_write image1d_array_t image);\n" |
| 41661 | "int __ovld __cnfn get_image_width(read_write image2d_array_t image);\n" |
| 41662 | "#ifdef cl_khr_depth_images\n" |
| 41663 | "int __ovld __cnfn get_image_width(read_write image2d_depth_t image);\n" |
| 41664 | "int __ovld __cnfn get_image_width(read_write image2d_array_depth_t image);\n" |
| 41665 | "#endif //cl_khr_depth_images\n" |
| 41666 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41667 | "int __ovld __cnfn get_image_width(read_write image2d_msaa_t image);\n" |
| 41668 | "int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);\n" |
| 41669 | "int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);\n" |
| 41670 | "int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);\n" |
| 41671 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41672 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41673 | "\n" |
| 41674 | "/**\n" |
| 41675 | " * Return the image height in pixels.\n" |
| 41676 | " */\n" |
| 41677 | "int __ovld __cnfn get_image_height(read_only image2d_t image);\n" |
| 41678 | "int __ovld __cnfn get_image_height(read_only image3d_t image);\n" |
| 41679 | "int __ovld __cnfn get_image_height(read_only image2d_array_t image);\n" |
| 41680 | "#ifdef cl_khr_depth_images\n" |
| 41681 | "int __ovld __cnfn get_image_height(read_only image2d_depth_t image);\n" |
| 41682 | "int __ovld __cnfn get_image_height(read_only image2d_array_depth_t image);\n" |
| 41683 | "#endif //cl_khr_depth_images\n" |
| 41684 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41685 | "int __ovld __cnfn get_image_height(read_only image2d_msaa_t image);\n" |
| 41686 | "int __ovld __cnfn get_image_height(read_only image2d_msaa_depth_t image);\n" |
| 41687 | "int __ovld __cnfn get_image_height(read_only image2d_array_msaa_t image);\n" |
| 41688 | "int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image);\n" |
| 41689 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41690 | "\n" |
| 41691 | "int __ovld __cnfn get_image_height(write_only image2d_t image);\n" |
| 41692 | "#ifdef cl_khr_3d_image_writes\n" |
| 41693 | "int __ovld __cnfn get_image_height(write_only image3d_t image);\n" |
| 41694 | "#endif\n" |
| 41695 | "int __ovld __cnfn get_image_height(write_only image2d_array_t image);\n" |
| 41696 | "#ifdef cl_khr_depth_images\n" |
| 41697 | "int __ovld __cnfn get_image_height(write_only image2d_depth_t image);\n" |
| 41698 | "int __ovld __cnfn get_image_height(write_only image2d_array_depth_t image);\n" |
| 41699 | "#endif //cl_khr_depth_images\n" |
| 41700 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41701 | "int __ovld __cnfn get_image_height(write_only image2d_msaa_t image);\n" |
| 41702 | "int __ovld __cnfn get_image_height(write_only image2d_msaa_depth_t image);\n" |
| 41703 | "int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);\n" |
| 41704 | "int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);\n" |
| 41705 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41706 | "\n" |
| 41707 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41708 | "int __ovld __cnfn get_image_height(read_write image2d_t image);\n" |
| 41709 | "int __ovld __cnfn get_image_height(read_write image3d_t image);\n" |
| 41710 | "int __ovld __cnfn get_image_height(read_write image2d_array_t image);\n" |
| 41711 | "#ifdef cl_khr_depth_images\n" |
| 41712 | "int __ovld __cnfn get_image_height(read_write image2d_depth_t image);\n" |
| 41713 | "int __ovld __cnfn get_image_height(read_write image2d_array_depth_t image);\n" |
| 41714 | "#endif //cl_khr_depth_images\n" |
| 41715 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41716 | "int __ovld __cnfn get_image_height(read_write image2d_msaa_t image);\n" |
| 41717 | "int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);\n" |
| 41718 | "int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);\n" |
| 41719 | "int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);\n" |
| 41720 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41721 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41722 | "\n" |
| 41723 | "/**\n" |
| 41724 | " * Return the image depth in pixels.\n" |
| 41725 | " */\n" |
| 41726 | "int __ovld __cnfn get_image_depth(read_only image3d_t image);\n" |
| 41727 | "\n" |
| 41728 | "#ifdef cl_khr_3d_image_writes\n" |
| 41729 | "int __ovld __cnfn get_image_depth(write_only image3d_t image);\n" |
| 41730 | "#endif\n" |
| 41731 | "\n" |
| 41732 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41733 | "int __ovld __cnfn get_image_depth(read_write image3d_t image);\n" |
| 41734 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41735 | "\n" |
| 41736 | "// OpenCL Extension v2.0 s9.18 - Mipmaps\n" |
| 41737 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41738 | "#ifdef cl_khr_mipmap_image\n" |
| 41739 | "/**\n" |
| 41740 | " * Return the image miplevels.\n" |
| 41741 | " */\n" |
| 41742 | "\n" |
| 41743 | "int __ovld get_image_num_mip_levels(read_only image1d_t image);\n" |
| 41744 | "int __ovld get_image_num_mip_levels(read_only image2d_t image);\n" |
| 41745 | "int __ovld get_image_num_mip_levels(read_only image3d_t image);\n" |
| 41746 | "\n" |
| 41747 | "int __ovld get_image_num_mip_levels(write_only image1d_t image);\n" |
| 41748 | "int __ovld get_image_num_mip_levels(write_only image2d_t image);\n" |
| 41749 | "#ifdef cl_khr_3d_image_writes\n" |
| 41750 | "int __ovld get_image_num_mip_levels(write_only image3d_t image);\n" |
| 41751 | "#endif\n" |
| 41752 | "\n" |
| 41753 | "int __ovld get_image_num_mip_levels(read_write image1d_t image);\n" |
| 41754 | "int __ovld get_image_num_mip_levels(read_write image2d_t image);\n" |
| 41755 | "int __ovld get_image_num_mip_levels(read_write image3d_t image);\n" |
| 41756 | "\n" |
| 41757 | "int __ovld get_image_num_mip_levels(read_only image1d_array_t image);\n" |
| 41758 | "int __ovld get_image_num_mip_levels(read_only image2d_array_t image);\n" |
| 41759 | "int __ovld get_image_num_mip_levels(read_only image2d_array_depth_t image);\n" |
| 41760 | "int __ovld get_image_num_mip_levels(read_only image2d_depth_t image);\n" |
| 41761 | "\n" |
| 41762 | "int __ovld get_image_num_mip_levels(write_only image1d_array_t image);\n" |
| 41763 | "int __ovld get_image_num_mip_levels(write_only image2d_array_t image);\n" |
| 41764 | "int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);\n" |
| 41765 | "int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);\n" |
| 41766 | "\n" |
| 41767 | "int __ovld get_image_num_mip_levels(read_write image1d_array_t image);\n" |
| 41768 | "int __ovld get_image_num_mip_levels(read_write image2d_array_t image);\n" |
| 41769 | "int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);\n" |
| 41770 | "int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);\n" |
| 41771 | "\n" |
| 41772 | "#endif //cl_khr_mipmap_image\n" |
| 41773 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41774 | "\n" |
| 41775 | "/**\n" |
| 41776 | " * Return the channel data type. Valid values are:\n" |
| 41777 | " * CLK_SNORM_INT8\n" |
| 41778 | " * CLK_SNORM_INT16\n" |
| 41779 | " * CLK_UNORM_INT8\n" |
| 41780 | " * CLK_UNORM_INT16\n" |
| 41781 | " * CLK_UNORM_SHORT_565\n" |
| 41782 | " * CLK_UNORM_SHORT_555\n" |
| 41783 | " * CLK_UNORM_SHORT_101010\n" |
| 41784 | " * CLK_SIGNED_INT8\n" |
| 41785 | " * CLK_SIGNED_INT16\n" |
| 41786 | " * CLK_SIGNED_INT32\n" |
| 41787 | " * CLK_UNSIGNED_INT8\n" |
| 41788 | " * CLK_UNSIGNED_INT16\n" |
| 41789 | " * CLK_UNSIGNED_INT32\n" |
| 41790 | " * CLK_HALF_FLOAT\n" |
| 41791 | " * CLK_FLOAT\n" |
| 41792 | " */\n" |
| 41793 | "\n" |
| 41794 | "//\n" |
| 41795 | "// Channel Datatype.\n" |
| 41796 | "//\n" |
| 41797 | "#define CLK_SNORM_INT8 0x10D0\n" |
| 41798 | "#define CLK_SNORM_INT16 0x10D1\n" |
| 41799 | "#define CLK_UNORM_INT8 0x10D2\n" |
| 41800 | "#define CLK_UNORM_INT16 0x10D3\n" |
| 41801 | "#define CLK_UNORM_SHORT_565 0x10D4\n" |
| 41802 | "#define CLK_UNORM_SHORT_555 0x10D5\n" |
| 41803 | "#define CLK_UNORM_INT_101010 0x10D6\n" |
| 41804 | "#define CLK_SIGNED_INT8 0x10D7\n" |
| 41805 | "#define CLK_SIGNED_INT16 0x10D8\n" |
| 41806 | "#define CLK_SIGNED_INT32 0x10D9\n" |
| 41807 | "#define CLK_UNSIGNED_INT8 0x10DA\n" |
| 41808 | "#define CLK_UNSIGNED_INT16 0x10DB\n" |
| 41809 | "#define CLK_UNSIGNED_INT32 0x10DC\n" |
| 41810 | "#define CLK_HALF_FLOAT 0x10DD\n" |
| 41811 | "#define CLK_FLOAT 0x10DE\n" |
| 41812 | "#define CLK_UNORM_INT24 0x10DF\n" |
| 41813 | "\n" |
| 41814 | "int __ovld __cnfn get_image_channel_data_type(read_only image1d_t image);\n" |
| 41815 | "int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t image);\n" |
| 41816 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_t image);\n" |
| 41817 | "int __ovld __cnfn get_image_channel_data_type(read_only image3d_t image);\n" |
| 41818 | "int __ovld __cnfn get_image_channel_data_type(read_only image1d_array_t image);\n" |
| 41819 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_t image);\n" |
| 41820 | "#ifdef cl_khr_depth_images\n" |
| 41821 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_depth_t image);\n" |
| 41822 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_depth_t image);\n" |
| 41823 | "#endif //cl_khr_depth_images\n" |
| 41824 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41825 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_t image);\n" |
| 41826 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_msaa_depth_t image);\n" |
| 41827 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_t image);\n" |
| 41828 | "int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth_t image);\n" |
| 41829 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41830 | "\n" |
| 41831 | "int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image);\n" |
| 41832 | "int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image);\n" |
| 41833 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image);\n" |
| 41834 | "#ifdef cl_khr_3d_image_writes\n" |
| 41835 | "int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image);\n" |
| 41836 | "#endif\n" |
| 41837 | "int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image);\n" |
| 41838 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_t image);\n" |
| 41839 | "#ifdef cl_khr_depth_images\n" |
| 41840 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_depth_t image);\n" |
| 41841 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_depth_t image);\n" |
| 41842 | "#endif //cl_khr_depth_images\n" |
| 41843 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41844 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_t image);\n" |
| 41845 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_msaa_depth_t image);\n" |
| 41846 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t image);\n" |
| 41847 | "int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);\n" |
| 41848 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41849 | "\n" |
| 41850 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41851 | "int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);\n" |
| 41852 | "int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);\n" |
| 41853 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);\n" |
| 41854 | "int __ovld __cnfn get_image_channel_data_type(read_write image3d_t image);\n" |
| 41855 | "int __ovld __cnfn get_image_channel_data_type(read_write image1d_array_t image);\n" |
| 41856 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_t image);\n" |
| 41857 | "#ifdef cl_khr_depth_images\n" |
| 41858 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_depth_t image);\n" |
| 41859 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_depth_t image);\n" |
| 41860 | "#endif //cl_khr_depth_images\n" |
| 41861 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41862 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_t image);\n" |
| 41863 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t image);\n" |
| 41864 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);\n" |
| 41865 | "int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);\n" |
| 41866 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41867 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41868 | "\n" |
| 41869 | "/**\n" |
| 41870 | " * Return the image channel order. Valid values are:\n" |
| 41871 | " * CLK_A\n" |
| 41872 | " * CLK_R\n" |
| 41873 | " * CLK_Rx\n" |
| 41874 | " * CLK_RG\n" |
| 41875 | " * CLK_RGx\n" |
| 41876 | " * CLK_RA\n" |
| 41877 | " * CLK_RGB\n" |
| 41878 | " * CLK_RGBx\n" |
| 41879 | " * CLK_RGBA\n" |
| 41880 | " * CLK_ARGB\n" |
| 41881 | " * CLK_BGRA\n" |
| 41882 | " * CLK_INTENSITY\n" |
| 41883 | " * CLK_LUMINANCE\n" |
| 41884 | " */\n" |
| 41885 | "// Channel order, numbering must be aligned with cl_channel_order in cl.h\n" |
| 41886 | "//\n" |
| 41887 | "#define CLK_R 0x10B0\n" |
| 41888 | "#define CLK_A 0x10B1\n" |
| 41889 | "#define CLK_RG 0x10B2\n" |
| 41890 | "#define CLK_RA 0x10B3\n" |
| 41891 | "#define CLK_RGB 0x10B4\n" |
| 41892 | "#define CLK_RGBA 0x10B5\n" |
| 41893 | "#define CLK_BGRA 0x10B6\n" |
| 41894 | "#define CLK_ARGB 0x10B7\n" |
| 41895 | "#define CLK_INTENSITY 0x10B8\n" |
| 41896 | "#define CLK_LUMINANCE 0x10B9\n" |
| 41897 | "#define CLK_Rx 0x10BA\n" |
| 41898 | "#define CLK_RGx 0x10BB\n" |
| 41899 | "#define CLK_RGBx 0x10BC\n" |
| 41900 | "#define CLK_DEPTH 0x10BD\n" |
| 41901 | "#define CLK_DEPTH_STENCIL 0x10BE\n" |
| 41902 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41903 | "#define CLK_sRGB 0x10BF\n" |
| 41904 | "#define CLK_sRGBx 0x10C0\n" |
| 41905 | "#define CLK_sRGBA 0x10C1\n" |
| 41906 | "#define CLK_sBGRA 0x10C2\n" |
| 41907 | "#define CLK_ABGR 0x10C3\n" |
| 41908 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41909 | "\n" |
| 41910 | "int __ovld __cnfn get_image_channel_order(read_only image1d_t image);\n" |
| 41911 | "int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t image);\n" |
| 41912 | "int __ovld __cnfn get_image_channel_order(read_only image2d_t image);\n" |
| 41913 | "int __ovld __cnfn get_image_channel_order(read_only image3d_t image);\n" |
| 41914 | "int __ovld __cnfn get_image_channel_order(read_only image1d_array_t image);\n" |
| 41915 | "int __ovld __cnfn get_image_channel_order(read_only image2d_array_t image);\n" |
| 41916 | "#ifdef cl_khr_depth_images\n" |
| 41917 | "int __ovld __cnfn get_image_channel_order(read_only image2d_depth_t image);\n" |
| 41918 | "int __ovld __cnfn get_image_channel_order(read_only image2d_array_depth_t image);\n" |
| 41919 | "#endif //cl_khr_depth_images\n" |
| 41920 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41921 | "int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_t image);\n" |
| 41922 | "int __ovld __cnfn get_image_channel_order(read_only image2d_msaa_depth_t image);\n" |
| 41923 | "int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_t image);\n" |
| 41924 | "int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t image);\n" |
| 41925 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41926 | "\n" |
| 41927 | "int __ovld __cnfn get_image_channel_order(write_only image1d_t image);\n" |
| 41928 | "int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image);\n" |
| 41929 | "int __ovld __cnfn get_image_channel_order(write_only image2d_t image);\n" |
| 41930 | "#ifdef cl_khr_3d_image_writes\n" |
| 41931 | "int __ovld __cnfn get_image_channel_order(write_only image3d_t image);\n" |
| 41932 | "#endif\n" |
| 41933 | "int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image);\n" |
| 41934 | "int __ovld __cnfn get_image_channel_order(write_only image2d_array_t image);\n" |
| 41935 | "#ifdef cl_khr_depth_images\n" |
| 41936 | "int __ovld __cnfn get_image_channel_order(write_only image2d_depth_t image);\n" |
| 41937 | "int __ovld __cnfn get_image_channel_order(write_only image2d_array_depth_t image);\n" |
| 41938 | "#endif //cl_khr_depth_images\n" |
| 41939 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41940 | "int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_t image);\n" |
| 41941 | "int __ovld __cnfn get_image_channel_order(write_only image2d_msaa_depth_t image);\n" |
| 41942 | "int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image);\n" |
| 41943 | "int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);\n" |
| 41944 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41945 | "\n" |
| 41946 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41947 | "int __ovld __cnfn get_image_channel_order(read_write image1d_t image);\n" |
| 41948 | "int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);\n" |
| 41949 | "int __ovld __cnfn get_image_channel_order(read_write image2d_t image);\n" |
| 41950 | "int __ovld __cnfn get_image_channel_order(read_write image3d_t image);\n" |
| 41951 | "int __ovld __cnfn get_image_channel_order(read_write image1d_array_t image);\n" |
| 41952 | "int __ovld __cnfn get_image_channel_order(read_write image2d_array_t image);\n" |
| 41953 | "#ifdef cl_khr_depth_images\n" |
| 41954 | "int __ovld __cnfn get_image_channel_order(read_write image2d_depth_t image);\n" |
| 41955 | "int __ovld __cnfn get_image_channel_order(read_write image2d_array_depth_t image);\n" |
| 41956 | "#endif //cl_khr_depth_images\n" |
| 41957 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41958 | "int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_t image);\n" |
| 41959 | "int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image);\n" |
| 41960 | "int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);\n" |
| 41961 | "int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);\n" |
| 41962 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41963 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41964 | "\n" |
| 41965 | "/**\n" |
| 41966 | " * Return the 2D image width and height as an int2\n" |
| 41967 | " * type. The width is returned in the x component, and\n" |
| 41968 | " * the height in the y component.\n" |
| 41969 | " */\n" |
| 41970 | "int2 __ovld __cnfn get_image_dim(read_only image2d_t image);\n" |
| 41971 | "int2 __ovld __cnfn get_image_dim(read_only image2d_array_t image);\n" |
| 41972 | "#ifdef cl_khr_depth_images\n" |
| 41973 | "int2 __ovld __cnfn get_image_dim(read_only image2d_array_depth_t image);\n" |
| 41974 | "int2 __ovld __cnfn get_image_dim(read_only image2d_depth_t image);\n" |
| 41975 | "#endif //cl_khr_depth_images\n" |
| 41976 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41977 | "int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_t image);\n" |
| 41978 | "int2 __ovld __cnfn get_image_dim(read_only image2d_msaa_depth_t image);\n" |
| 41979 | "int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_t image);\n" |
| 41980 | "int2 __ovld __cnfn get_image_dim(read_only image2d_array_msaa_depth_t image);\n" |
| 41981 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41982 | "\n" |
| 41983 | "int2 __ovld __cnfn get_image_dim(write_only image2d_t image);\n" |
| 41984 | "int2 __ovld __cnfn get_image_dim(write_only image2d_array_t image);\n" |
| 41985 | "#ifdef cl_khr_depth_images\n" |
| 41986 | "int2 __ovld __cnfn get_image_dim(write_only image2d_array_depth_t image);\n" |
| 41987 | "int2 __ovld __cnfn get_image_dim(write_only image2d_depth_t image);\n" |
| 41988 | "#endif //cl_khr_depth_images\n" |
| 41989 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 41990 | "int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_t image);\n" |
| 41991 | "int2 __ovld __cnfn get_image_dim(write_only image2d_msaa_depth_t image);\n" |
| 41992 | "int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);\n" |
| 41993 | "int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);\n" |
| 41994 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 41995 | "\n" |
| 41996 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 41997 | "int2 __ovld __cnfn get_image_dim(read_write image2d_t image);\n" |
| 41998 | "int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);\n" |
| 41999 | "#ifdef cl_khr_depth_images\n" |
| 42000 | "int2 __ovld __cnfn get_image_dim(read_write image2d_array_depth_t image);\n" |
| 42001 | "int2 __ovld __cnfn get_image_dim(read_write image2d_depth_t image);\n" |
| 42002 | "#endif //cl_khr_depth_images\n" |
| 42003 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 42004 | "int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_t image);\n" |
| 42005 | "int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);\n" |
| 42006 | "int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);\n" |
| 42007 | "int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);\n" |
| 42008 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 42009 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42010 | "\n" |
| 42011 | "/**\n" |
| 42012 | " * Return the 3D image width, height, and depth as an\n" |
| 42013 | " * int4 type. The width is returned in the x\n" |
| 42014 | " * component, height in the y component, depth in the z\n" |
| 42015 | " * component and the w component is 0.\n" |
| 42016 | " */\n" |
| 42017 | "int4 __ovld __cnfn get_image_dim(read_only image3d_t image);\n" |
| 42018 | "#ifdef cl_khr_3d_image_writes\n" |
| 42019 | "int4 __ovld __cnfn get_image_dim(write_only image3d_t image);\n" |
| 42020 | "#endif\n" |
| 42021 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42022 | "int4 __ovld __cnfn get_image_dim(read_write image3d_t image);\n" |
| 42023 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42024 | "\n" |
| 42025 | "/**\n" |
| 42026 | " * Return the image array size.\n" |
| 42027 | " */\n" |
| 42028 | "\n" |
| 42029 | "size_t __ovld __cnfn get_image_array_size(read_only image1d_array_t image_array);\n" |
| 42030 | "size_t __ovld __cnfn get_image_array_size(read_only image2d_array_t image_array);\n" |
| 42031 | "#ifdef cl_khr_depth_images\n" |
| 42032 | "size_t __ovld __cnfn get_image_array_size(read_only image2d_array_depth_t image_array);\n" |
| 42033 | "#endif //cl_khr_depth_images\n" |
| 42034 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 42035 | "size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_t image_array);\n" |
| 42036 | "size_t __ovld __cnfn get_image_array_size(read_only image2d_array_msaa_depth_t image_array);\n" |
| 42037 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 42038 | "\n" |
| 42039 | "size_t __ovld __cnfn get_image_array_size(write_only image1d_array_t image_array);\n" |
| 42040 | "size_t __ovld __cnfn get_image_array_size(write_only image2d_array_t image_array);\n" |
| 42041 | "#ifdef cl_khr_depth_images\n" |
| 42042 | "size_t __ovld __cnfn get_image_array_size(write_only image2d_array_depth_t image_array);\n" |
| 42043 | "#endif //cl_khr_depth_images\n" |
| 42044 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 42045 | "size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_array);\n" |
| 42046 | "size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);\n" |
| 42047 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 42048 | "\n" |
| 42049 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42050 | "size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);\n" |
| 42051 | "size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);\n" |
| 42052 | "#ifdef cl_khr_depth_images\n" |
| 42053 | "size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image_array);\n" |
| 42054 | "#endif //cl_khr_depth_images\n" |
| 42055 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 42056 | "size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);\n" |
| 42057 | "size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);\n" |
| 42058 | "#endif //cl_khr_gl_msaa_sharing\n" |
| 42059 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42060 | "\n" |
| 42061 | "/**\n" |
| 42062 | "* Return the number of samples associated with image\n" |
| 42063 | "*/\n" |
| 42064 | "#if defined(cl_khr_gl_msaa_sharing)\n" |
| 42065 | "int __ovld get_image_num_samples(read_only image2d_msaa_t image);\n" |
| 42066 | "int __ovld get_image_num_samples(read_only image2d_msaa_depth_t image);\n" |
| 42067 | "int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n" |
| 42068 | "int __ovld get_image_num_samples(read_only image2d_array_msaa_t image);\n" |
| 42069 | "int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image);\n" |
| 42070 | "\n" |
| 42071 | "int __ovld get_image_num_samples(write_only image2d_msaa_t image);\n" |
| 42072 | "int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);\n" |
| 42073 | "int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n" |
| 42074 | "int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);\n" |
| 42075 | "int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);\n" |
| 42076 | "\n" |
| 42077 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42078 | "int __ovld get_image_num_samples(read_write image2d_msaa_t image);\n" |
| 42079 | "int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);\n" |
| 42080 | "int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n" |
| 42081 | "int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);\n" |
| 42082 | "int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);\n" |
| 42083 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42084 | "#endif\n" |
| 42085 | "\n" |
| 42086 | "// OpenCL v2.0 s6.13.15 - Work-group Functions\n" |
| 42087 | "\n" |
| 42088 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42089 | "int __ovld __conv work_group_all(int predicate);\n" |
| 42090 | "int __ovld __conv work_group_any(int predicate);\n" |
| 42091 | "\n" |
| 42092 | "#ifdef cl_khr_fp16\n" |
| 42093 | "half __ovld __conv work_group_broadcast(half a, size_t local_id);\n" |
| 42094 | "half __ovld __conv work_group_broadcast(half a, size_t x, size_t y);\n" |
| 42095 | "half __ovld __conv work_group_broadcast(half a, size_t x, size_t y, size_t z);\n" |
| 42096 | "#endif\n" |
| 42097 | "int __ovld __conv work_group_broadcast(int a, size_t local_id);\n" |
| 42098 | "int __ovld __conv work_group_broadcast(int a, size_t x, size_t y);\n" |
| 42099 | "int __ovld __conv work_group_broadcast(int a, size_t x, size_t y, size_t z);\n" |
| 42100 | "uint __ovld __conv work_group_broadcast(uint a, size_t local_id);\n" |
| 42101 | "uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y);\n" |
| 42102 | "uint __ovld __conv work_group_broadcast(uint a, size_t x, size_t y, size_t z);\n" |
| 42103 | "long __ovld __conv work_group_broadcast(long a, size_t local_id);\n" |
| 42104 | "long __ovld __conv work_group_broadcast(long a, size_t x, size_t y);\n" |
| 42105 | "long __ovld __conv work_group_broadcast(long a, size_t x, size_t y, size_t z);\n" |
| 42106 | "ulong __ovld __conv work_group_broadcast(ulong a, size_t local_id);\n" |
| 42107 | "ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y);\n" |
| 42108 | "ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z);\n" |
| 42109 | "float __ovld __conv work_group_broadcast(float a, size_t local_id);\n" |
| 42110 | "float __ovld __conv work_group_broadcast(float a, size_t x, size_t y);\n" |
| 42111 | "float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z);\n" |
| 42112 | "#ifdef cl_khr_fp64\n" |
| 42113 | "double __ovld __conv work_group_broadcast(double a, size_t local_id);\n" |
| 42114 | "double __ovld __conv work_group_broadcast(double a, size_t x, size_t y);\n" |
| 42115 | "double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z);\n" |
| 42116 | "#endif //cl_khr_fp64\n" |
| 42117 | "\n" |
| 42118 | "#ifdef cl_khr_fp16\n" |
| 42119 | "half __ovld __conv work_group_reduce_add(half x);\n" |
| 42120 | "half __ovld __conv work_group_reduce_min(half x);\n" |
| 42121 | "half __ovld __conv work_group_reduce_max(half x);\n" |
| 42122 | "half __ovld __conv work_group_scan_exclusive_add(half x);\n" |
| 42123 | "half __ovld __conv work_group_scan_exclusive_min(half x);\n" |
| 42124 | "half __ovld __conv work_group_scan_exclusive_max(half x);\n" |
| 42125 | "half __ovld __conv work_group_scan_inclusive_add(half x);\n" |
| 42126 | "half __ovld __conv work_group_scan_inclusive_min(half x);\n" |
| 42127 | "half __ovld __conv work_group_scan_inclusive_max(half x);\n" |
| 42128 | "#endif\n" |
| 42129 | "int __ovld __conv work_group_reduce_add(int x);\n" |
| 42130 | "int __ovld __conv work_group_reduce_min(int x);\n" |
| 42131 | "int __ovld __conv work_group_reduce_max(int x);\n" |
| 42132 | "int __ovld __conv work_group_scan_exclusive_add(int x);\n" |
| 42133 | "int __ovld __conv work_group_scan_exclusive_min(int x);\n" |
| 42134 | "int __ovld __conv work_group_scan_exclusive_max(int x);\n" |
| 42135 | "int __ovld __conv work_group_scan_inclusive_add(int x);\n" |
| 42136 | "int __ovld __conv work_group_scan_inclusive_min(int x);\n" |
| 42137 | "int __ovld __conv work_group_scan_inclusive_max(int x);\n" |
| 42138 | "uint __ovld __conv work_group_reduce_add(uint x);\n" |
| 42139 | "uint __ovld __conv work_group_reduce_min(uint x);\n" |
| 42140 | "uint __ovld __conv work_group_reduce_max(uint x);\n" |
| 42141 | "uint __ovld __conv work_group_scan_exclusive_add(uint x);\n" |
| 42142 | "uint __ovld __conv work_group_scan_exclusive_min(uint x);\n" |
| 42143 | "uint __ovld __conv work_group_scan_exclusive_max(uint x);\n" |
| 42144 | "uint __ovld __conv work_group_scan_inclusive_add(uint x);\n" |
| 42145 | "uint __ovld __conv work_group_scan_inclusive_min(uint x);\n" |
| 42146 | "uint __ovld __conv work_group_scan_inclusive_max(uint x);\n" |
| 42147 | "long __ovld __conv work_group_reduce_add(long x);\n" |
| 42148 | "long __ovld __conv work_group_reduce_min(long x);\n" |
| 42149 | "long __ovld __conv work_group_reduce_max(long x);\n" |
| 42150 | "long __ovld __conv work_group_scan_exclusive_add(long x);\n" |
| 42151 | "long __ovld __conv work_group_scan_exclusive_min(long x);\n" |
| 42152 | "long __ovld __conv work_group_scan_exclusive_max(long x);\n" |
| 42153 | "long __ovld __conv work_group_scan_inclusive_add(long x);\n" |
| 42154 | "long __ovld __conv work_group_scan_inclusive_min(long x);\n" |
| 42155 | "long __ovld __conv work_group_scan_inclusive_max(long x);\n" |
| 42156 | "ulong __ovld __conv work_group_reduce_add(ulong x);\n" |
| 42157 | "ulong __ovld __conv work_group_reduce_min(ulong x);\n" |
| 42158 | "ulong __ovld __conv work_group_reduce_max(ulong x);\n" |
| 42159 | "ulong __ovld __conv work_group_scan_exclusive_add(ulong x);\n" |
| 42160 | "ulong __ovld __conv work_group_scan_exclusive_min(ulong x);\n" |
| 42161 | "ulong __ovld __conv work_group_scan_exclusive_max(ulong x);\n" |
| 42162 | "ulong __ovld __conv work_group_scan_inclusive_add(ulong x);\n" |
| 42163 | "ulong __ovld __conv work_group_scan_inclusive_min(ulong x);\n" |
| 42164 | "ulong __ovld __conv work_group_scan_inclusive_max(ulong x);\n" |
| 42165 | "float __ovld __conv work_group_reduce_add(float x);\n" |
| 42166 | "float __ovld __conv work_group_reduce_min(float x);\n" |
| 42167 | "float __ovld __conv work_group_reduce_max(float x);\n" |
| 42168 | "float __ovld __conv work_group_scan_exclusive_add(float x);\n" |
| 42169 | "float __ovld __conv work_group_scan_exclusive_min(float x);\n" |
| 42170 | "float __ovld __conv work_group_scan_exclusive_max(float x);\n" |
| 42171 | "float __ovld __conv work_group_scan_inclusive_add(float x);\n" |
| 42172 | "float __ovld __conv work_group_scan_inclusive_min(float x);\n" |
| 42173 | "float __ovld __conv work_group_scan_inclusive_max(float x);\n" |
| 42174 | "#ifdef cl_khr_fp64\n" |
| 42175 | "double __ovld __conv work_group_reduce_add(double x);\n" |
| 42176 | "double __ovld __conv work_group_reduce_min(double x);\n" |
| 42177 | "double __ovld __conv work_group_reduce_max(double x);\n" |
| 42178 | "double __ovld __conv work_group_scan_exclusive_add(double x);\n" |
| 42179 | "double __ovld __conv work_group_scan_exclusive_min(double x);\n" |
| 42180 | "double __ovld __conv work_group_scan_exclusive_max(double x);\n" |
| 42181 | "double __ovld __conv work_group_scan_inclusive_add(double x);\n" |
| 42182 | "double __ovld __conv work_group_scan_inclusive_min(double x);\n" |
| 42183 | "double __ovld __conv work_group_scan_inclusive_max(double x);\n" |
| 42184 | "#endif //cl_khr_fp64\n" |
| 42185 | "\n" |
| 42186 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42187 | "\n" |
| 42188 | "// OpenCL v2.0 s6.13.16 - Pipe Functions\n" |
| 42189 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42190 | "#define PIPE_RESERVE_ID_VALID_BIT (1U << 30)\n" |
| 42191 | "#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))\n" |
| 42192 | "bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);\n" |
| 42193 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42194 | "\n" |
| 42195 | "\n" |
| 42196 | "// OpenCL v2.0 s6.13.17 - Enqueue Kernels\n" |
| 42197 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42198 | "\n" |
| 42199 | "#define CL_COMPLETE 0x0\n" |
| 42200 | "#define CL_RUNNING 0x1\n" |
| 42201 | "#define CL_SUBMITTED 0x2\n" |
| 42202 | "#define CL_QUEUED 0x3\n" |
| 42203 | "\n" |
| 42204 | "#define CLK_SUCCESS 0\n" |
| 42205 | "#define CLK_ENQUEUE_FAILURE -101\n" |
| 42206 | "#define CLK_INVALID_QUEUE -102\n" |
| 42207 | "#define CLK_INVALID_NDRANGE -160\n" |
| 42208 | "#define CLK_INVALID_EVENT_WAIT_LIST -57\n" |
| 42209 | "#define CLK_DEVICE_QUEUE_FULL -161\n" |
| 42210 | "#define CLK_INVALID_ARG_SIZE -51\n" |
| 42211 | "#define CLK_EVENT_ALLOCATION_FAILURE -100\n" |
| 42212 | "#define CLK_OUT_OF_RESOURCES -5\n" |
| 42213 | "\n" |
| 42214 | "#define CLK_NULL_QUEUE 0\n" |
| 42215 | "#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))\n" |
| 42216 | "\n" |
| 42217 | "// execution model related definitions\n" |
| 42218 | "#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0\n" |
| 42219 | "#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1\n" |
| 42220 | "#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2\n" |
| 42221 | "\n" |
| 42222 | "typedef int kernel_enqueue_flags_t;\n" |
| 42223 | "typedef int clk_profiling_info;\n" |
| 42224 | "\n" |
| 42225 | "// Profiling info name (see capture_event_profiling_info)\n" |
| 42226 | "#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1\n" |
| 42227 | "\n" |
| 42228 | "#define MAX_WORK_DIM 3\n" |
| 42229 | "\n" |
| 42230 | "typedef struct {\n" |
| 42231 | " unsigned int workDimension;\n" |
| 42232 | " size_t globalWorkOffset[MAX_WORK_DIM];\n" |
| 42233 | " size_t globalWorkSize[MAX_WORK_DIM];\n" |
| 42234 | " size_t localWorkSize[MAX_WORK_DIM];\n" |
| 42235 | "} ndrange_t;\n" |
| 42236 | "\n" |
| 42237 | "ndrange_t __ovld ndrange_1D(size_t);\n" |
| 42238 | "ndrange_t __ovld ndrange_1D(size_t, size_t);\n" |
| 42239 | "ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);\n" |
| 42240 | "\n" |
| 42241 | "ndrange_t __ovld ndrange_2D(const size_t[2]);\n" |
| 42242 | "ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2]);\n" |
| 42243 | "ndrange_t __ovld ndrange_2D(const size_t[2], const size_t[2], const size_t[2]);\n" |
| 42244 | "\n" |
| 42245 | "ndrange_t __ovld ndrange_3D(const size_t[3]);\n" |
| 42246 | "ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3]);\n" |
| 42247 | "ndrange_t __ovld ndrange_3D(const size_t[3], const size_t[3], const size_t[3]);\n" |
| 42248 | "\n" |
| 42249 | "int __ovld enqueue_marker(queue_t, uint, const __private clk_event_t*, __private clk_event_t*);\n" |
| 42250 | "\n" |
| 42251 | "void __ovld retain_event(clk_event_t);\n" |
| 42252 | "\n" |
| 42253 | "void __ovld release_event(clk_event_t);\n" |
| 42254 | "\n" |
| 42255 | "clk_event_t __ovld create_user_event(void);\n" |
| 42256 | "\n" |
| 42257 | "void __ovld set_user_event_status(clk_event_t e, int state);\n" |
| 42258 | "\n" |
| 42259 | "bool __ovld is_valid_event (clk_event_t event);\n" |
| 42260 | "\n" |
| 42261 | "void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);\n" |
| 42262 | "\n" |
| 42263 | "queue_t __ovld get_default_queue(void);\n" |
| 42264 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42265 | "\n" |
| 42266 | "// OpenCL Extension v2.0 s9.17 - Sub-groups\n" |
| 42267 | "\n" |
| 42268 | "#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)\n" |
| 42269 | "// Shared Sub Group Functions\n" |
| 42270 | "uint __ovld get_sub_group_size(void);\n" |
| 42271 | "uint __ovld get_max_sub_group_size(void);\n" |
| 42272 | "uint __ovld get_num_sub_groups(void);\n" |
| 42273 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42274 | "uint __ovld get_enqueued_num_sub_groups(void);\n" |
| 42275 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42276 | "uint __ovld get_sub_group_id(void);\n" |
| 42277 | "uint __ovld get_sub_group_local_id(void);\n" |
| 42278 | "\n" |
| 42279 | "void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags);\n" |
| 42280 | "#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42281 | "void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);\n" |
| 42282 | "#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0\n" |
| 42283 | "\n" |
| 42284 | "int __ovld __conv sub_group_all(int predicate);\n" |
| 42285 | "int __ovld __conv sub_group_any(int predicate);\n" |
| 42286 | "\n" |
| 42287 | "int __ovld __conv sub_group_broadcast(int x, uint sub_group_local_id);\n" |
| 42288 | "uint __ovld __conv sub_group_broadcast(uint x, uint sub_group_local_id);\n" |
| 42289 | "long __ovld __conv sub_group_broadcast(long x, uint sub_group_local_id);\n" |
| 42290 | "ulong __ovld __conv sub_group_broadcast(ulong x, uint sub_group_local_id);\n" |
| 42291 | "float __ovld __conv sub_group_broadcast(float x, uint sub_group_local_id);\n" |
| 42292 | "\n" |
| 42293 | "int __ovld __conv sub_group_reduce_add(int x);\n" |
| 42294 | "uint __ovld __conv sub_group_reduce_add(uint x);\n" |
| 42295 | "long __ovld __conv sub_group_reduce_add(long x);\n" |
| 42296 | "ulong __ovld __conv sub_group_reduce_add(ulong x);\n" |
| 42297 | "float __ovld __conv sub_group_reduce_add(float x);\n" |
| 42298 | "int __ovld __conv sub_group_reduce_min(int x);\n" |
| 42299 | "uint __ovld __conv sub_group_reduce_min(uint x);\n" |
| 42300 | "long __ovld __conv sub_group_reduce_min(long x);\n" |
| 42301 | "ulong __ovld __conv sub_group_reduce_min(ulong x);\n" |
| 42302 | "float __ovld __conv sub_group_reduce_min(float x);\n" |
| 42303 | "int __ovld __conv sub_group_reduce_max(int x);\n" |
| 42304 | "uint __ovld __conv sub_group_reduce_max(uint x);\n" |
| 42305 | "long __ovld __conv sub_group_reduce_max(long x);\n" |
| 42306 | "ulong __ovld __conv sub_group_reduce_max(ulong x);\n" |
| 42307 | "float __ovld __conv sub_group_reduce_max(float x);\n" |
| 42308 | "\n" |
| 42309 | "int __ovld __conv sub_group_scan_exclusive_add(int x);\n" |
| 42310 | "uint __ovld __conv sub_group_scan_exclusive_add(uint x);\n" |
| 42311 | "long __ovld __conv sub_group_scan_exclusive_add(long x);\n" |
| 42312 | "ulong __ovld __conv sub_group_scan_exclusive_add(ulong x);\n" |
| 42313 | "float __ovld __conv sub_group_scan_exclusive_add(float x);\n" |
| 42314 | "int __ovld __conv sub_group_scan_exclusive_min(int x);\n" |
| 42315 | "uint __ovld __conv sub_group_scan_exclusive_min(uint x);\n" |
| 42316 | "long __ovld __conv sub_group_scan_exclusive_min(long x);\n" |
| 42317 | "ulong __ovld __conv sub_group_scan_exclusive_min(ulong x);\n" |
| 42318 | "float __ovld __conv sub_group_scan_exclusive_min(float x);\n" |
| 42319 | "int __ovld __conv sub_group_scan_exclusive_max(int x);\n" |
| 42320 | "uint __ovld __conv sub_group_scan_exclusive_max(uint x);\n" |
| 42321 | "long __ovld __conv sub_group_scan_exclusive_max(long x);\n" |
| 42322 | "ulong __ovld __conv sub_group_scan_exclusive_max(ulong x);\n" |
| 42323 | "float __ovld __conv sub_group_scan_exclusive_max(float x);\n" |
| 42324 | "\n" |
| 42325 | "int __ovld __conv sub_group_scan_inclusive_add(int x);\n" |
| 42326 | "uint __ovld __conv sub_group_scan_inclusive_add(uint x);\n" |
| 42327 | "long __ovld __conv sub_group_scan_inclusive_add(long x);\n" |
| 42328 | "ulong __ovld __conv sub_group_scan_inclusive_add(ulong x);\n" |
| 42329 | "float __ovld __conv sub_group_scan_inclusive_add(float x);\n" |
| 42330 | "int __ovld __conv sub_group_scan_inclusive_min(int x);\n" |
| 42331 | "uint __ovld __conv sub_group_scan_inclusive_min(uint x);\n" |
| 42332 | "long __ovld __conv sub_group_scan_inclusive_min(long x);\n" |
| 42333 | "ulong __ovld __conv sub_group_scan_inclusive_min(ulong x);\n" |
| 42334 | "float __ovld __conv sub_group_scan_inclusive_min(float x);\n" |
| 42335 | "int __ovld __conv sub_group_scan_inclusive_max(int x);\n" |
| 42336 | "uint __ovld __conv sub_group_scan_inclusive_max(uint x);\n" |
| 42337 | "long __ovld __conv sub_group_scan_inclusive_max(long x);\n" |
| 42338 | "ulong __ovld __conv sub_group_scan_inclusive_max(ulong x);\n" |
| 42339 | "float __ovld __conv sub_group_scan_inclusive_max(float x);\n" |
| 42340 | "\n" |
| 42341 | "#ifdef cl_khr_fp16\n" |
| 42342 | "half __ovld __conv sub_group_broadcast(half x, uint sub_group_local_id);\n" |
| 42343 | "half __ovld __conv sub_group_reduce_add(half x);\n" |
| 42344 | "half __ovld __conv sub_group_reduce_min(half x);\n" |
| 42345 | "half __ovld __conv sub_group_reduce_max(half x);\n" |
| 42346 | "half __ovld __conv sub_group_scan_exclusive_add(half x);\n" |
| 42347 | "half __ovld __conv sub_group_scan_exclusive_min(half x);\n" |
| 42348 | "half __ovld __conv sub_group_scan_exclusive_max(half x);\n" |
| 42349 | "half __ovld __conv sub_group_scan_inclusive_add(half x);\n" |
| 42350 | "half __ovld __conv sub_group_scan_inclusive_min(half x);\n" |
| 42351 | "half __ovld __conv sub_group_scan_inclusive_max(half x);\n" |
| 42352 | "#endif //cl_khr_fp16\n" |
| 42353 | "\n" |
| 42354 | "#ifdef cl_khr_fp64\n" |
| 42355 | "double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id);\n" |
| 42356 | "double __ovld __conv sub_group_reduce_add(double x);\n" |
| 42357 | "double __ovld __conv sub_group_reduce_min(double x);\n" |
| 42358 | "double __ovld __conv sub_group_reduce_max(double x);\n" |
| 42359 | "double __ovld __conv sub_group_scan_exclusive_add(double x);\n" |
| 42360 | "double __ovld __conv sub_group_scan_exclusive_min(double x);\n" |
| 42361 | "double __ovld __conv sub_group_scan_exclusive_max(double x);\n" |
| 42362 | "double __ovld __conv sub_group_scan_inclusive_add(double x);\n" |
| 42363 | "double __ovld __conv sub_group_scan_inclusive_min(double x);\n" |
| 42364 | "double __ovld __conv sub_group_scan_inclusive_max(double x);\n" |
| 42365 | "#endif //cl_khr_fp64\n" |
| 42366 | "\n" |
| 42367 | "#endif //cl_khr_subgroups cl_intel_subgroups\n" |
| 42368 | "\n" |
| 42369 | "#if defined(cl_intel_subgroups)\n" |
| 42370 | "// Intel-Specific Sub Group Functions\n" |
| 42371 | "float __ovld __conv intel_sub_group_shuffle( float x, uint c );\n" |
| 42372 | "float2 __ovld __conv intel_sub_group_shuffle( float2 x, uint c );\n" |
| 42373 | "float3 __ovld __conv intel_sub_group_shuffle( float3 x, uint c );\n" |
| 42374 | "float4 __ovld __conv intel_sub_group_shuffle( float4 x, uint c );\n" |
| 42375 | "float8 __ovld __conv intel_sub_group_shuffle( float8 x, uint c );\n" |
| 42376 | "float16 __ovld __conv intel_sub_group_shuffle( float16 x, uint c );\n" |
| 42377 | "\n" |
| 42378 | "int __ovld __conv intel_sub_group_shuffle( int x, uint c );\n" |
| 42379 | "int2 __ovld __conv intel_sub_group_shuffle( int2 x, uint c );\n" |
| 42380 | "int3 __ovld __conv intel_sub_group_shuffle( int3 x, uint c );\n" |
| 42381 | "int4 __ovld __conv intel_sub_group_shuffle( int4 x, uint c );\n" |
| 42382 | "int8 __ovld __conv intel_sub_group_shuffle( int8 x, uint c );\n" |
| 42383 | "int16 __ovld __conv intel_sub_group_shuffle( int16 x, uint c );\n" |
| 42384 | "\n" |
| 42385 | "uint __ovld __conv intel_sub_group_shuffle( uint x, uint c );\n" |
| 42386 | "uint2 __ovld __conv intel_sub_group_shuffle( uint2 x, uint c );\n" |
| 42387 | "uint3 __ovld __conv intel_sub_group_shuffle( uint3 x, uint c );\n" |
| 42388 | "uint4 __ovld __conv intel_sub_group_shuffle( uint4 x, uint c );\n" |
| 42389 | "uint8 __ovld __conv intel_sub_group_shuffle( uint8 x, uint c );\n" |
| 42390 | "uint16 __ovld __conv intel_sub_group_shuffle( uint16 x, uint c );\n" |
| 42391 | "\n" |
| 42392 | "long __ovld __conv intel_sub_group_shuffle( long x, uint c );\n" |
| 42393 | "ulong __ovld __conv intel_sub_group_shuffle( ulong x, uint c );\n" |
| 42394 | "\n" |
| 42395 | "float __ovld __conv intel_sub_group_shuffle_down( float cur, float next, uint c );\n" |
| 42396 | "float2 __ovld __conv intel_sub_group_shuffle_down( float2 cur, float2 next, uint c );\n" |
| 42397 | "float3 __ovld __conv intel_sub_group_shuffle_down( float3 cur, float3 next, uint c );\n" |
| 42398 | "float4 __ovld __conv intel_sub_group_shuffle_down( float4 cur, float4 next, uint c );\n" |
| 42399 | "float8 __ovld __conv intel_sub_group_shuffle_down( float8 cur, float8 next, uint c );\n" |
| 42400 | "float16 __ovld __conv intel_sub_group_shuffle_down( float16 cur, float16 next, uint c );\n" |
| 42401 | "\n" |
| 42402 | "int __ovld __conv intel_sub_group_shuffle_down( int cur, int next, uint c );\n" |
| 42403 | "int2 __ovld __conv intel_sub_group_shuffle_down( int2 cur, int2 next, uint c );\n" |
| 42404 | "int3 __ovld __conv intel_sub_group_shuffle_down( int3 cur, int3 next, uint c );\n" |
| 42405 | "int4 __ovld __conv intel_sub_group_shuffle_down( int4 cur, int4 next, uint c );\n" |
| 42406 | "int8 __ovld __conv intel_sub_group_shuffle_down( int8 cur, int8 next, uint c );\n" |
| 42407 | "int16 __ovld __conv intel_sub_group_shuffle_down( int16 cur, int16 next, uint c );\n" |
| 42408 | "\n" |
| 42409 | "uint __ovld __conv intel_sub_group_shuffle_down( uint cur, uint next, uint c );\n" |
| 42410 | "uint2 __ovld __conv intel_sub_group_shuffle_down( uint2 cur, uint2 next, uint c );\n" |
| 42411 | "uint3 __ovld __conv intel_sub_group_shuffle_down( uint3 cur, uint3 next, uint c );\n" |
| 42412 | "uint4 __ovld __conv intel_sub_group_shuffle_down( uint4 cur, uint4 next, uint c );\n" |
| 42413 | "uint8 __ovld __conv intel_sub_group_shuffle_down( uint8 cur, uint8 next, uint c );\n" |
| 42414 | "uint16 __ovld __conv intel_sub_group_shuffle_down( uint16 cur, uint16 next, uint c );\n" |
| 42415 | "\n" |
| 42416 | "long __ovld __conv intel_sub_group_shuffle_down( long prev, long cur, uint c );\n" |
| 42417 | "ulong __ovld __conv intel_sub_group_shuffle_down( ulong prev, ulong cur, uint c );\n" |
| 42418 | "\n" |
| 42419 | "float __ovld __conv intel_sub_group_shuffle_up( float prev, float cur, uint c );\n" |
| 42420 | "float2 __ovld __conv intel_sub_group_shuffle_up( float2 prev, float2 cur, uint c );\n" |
| 42421 | "float3 __ovld __conv intel_sub_group_shuffle_up( float3 prev, float3 cur, uint c );\n" |
| 42422 | "float4 __ovld __conv intel_sub_group_shuffle_up( float4 prev, float4 cur, uint c );\n" |
| 42423 | "float8 __ovld __conv intel_sub_group_shuffle_up( float8 prev, float8 cur, uint c );\n" |
| 42424 | "float16 __ovld __conv intel_sub_group_shuffle_up( float16 prev, float16 cur, uint c );\n" |
| 42425 | "\n" |
| 42426 | "int __ovld __conv intel_sub_group_shuffle_up( int prev, int cur, uint c );\n" |
| 42427 | "int2 __ovld __conv intel_sub_group_shuffle_up( int2 prev, int2 cur, uint c );\n" |
| 42428 | "int3 __ovld __conv intel_sub_group_shuffle_up( int3 prev, int3 cur, uint c );\n" |
| 42429 | "int4 __ovld __conv intel_sub_group_shuffle_up( int4 prev, int4 cur, uint c );\n" |
| 42430 | "int8 __ovld __conv intel_sub_group_shuffle_up( int8 prev, int8 cur, uint c );\n" |
| 42431 | "int16 __ovld __conv intel_sub_group_shuffle_up( int16 prev, int16 cur, uint c );\n" |
| 42432 | "\n" |
| 42433 | "uint __ovld __conv intel_sub_group_shuffle_up( uint prev, uint cur, uint c );\n" |
| 42434 | "uint2 __ovld __conv intel_sub_group_shuffle_up( uint2 prev, uint2 cur, uint c );\n" |
| 42435 | "uint3 __ovld __conv intel_sub_group_shuffle_up( uint3 prev, uint3 cur, uint c );\n" |
| 42436 | "uint4 __ovld __conv intel_sub_group_shuffle_up( uint4 prev, uint4 cur, uint c );\n" |
| 42437 | "uint8 __ovld __conv intel_sub_group_shuffle_up( uint8 prev, uint8 cur, uint c );\n" |
| 42438 | "uint16 __ovld __conv intel_sub_group_shuffle_up( uint16 prev, uint16 cur, uint c );\n" |
| 42439 | "\n" |
| 42440 | "long __ovld __conv intel_sub_group_shuffle_up( long prev, long cur, uint c );\n" |
| 42441 | "ulong __ovld __conv intel_sub_group_shuffle_up( ulong prev, ulong cur, uint c );\n" |
| 42442 | "\n" |
| 42443 | "float __ovld __conv intel_sub_group_shuffle_xor( float x, uint c );\n" |
| 42444 | "float2 __ovld __conv intel_sub_group_shuffle_xor( float2 x, uint c );\n" |
| 42445 | "float3 __ovld __conv intel_sub_group_shuffle_xor( float3 x, uint c );\n" |
| 42446 | "float4 __ovld __conv intel_sub_group_shuffle_xor( float4 x, uint c );\n" |
| 42447 | "float8 __ovld __conv intel_sub_group_shuffle_xor( float8 x, uint c );\n" |
| 42448 | "float16 __ovld __conv intel_sub_group_shuffle_xor( float16 x, uint c );\n" |
| 42449 | "\n" |
| 42450 | "int __ovld __conv intel_sub_group_shuffle_xor( int x, uint c );\n" |
| 42451 | "int2 __ovld __conv intel_sub_group_shuffle_xor( int2 x, uint c );\n" |
| 42452 | "int3 __ovld __conv intel_sub_group_shuffle_xor( int3 x, uint c );\n" |
| 42453 | "int4 __ovld __conv intel_sub_group_shuffle_xor( int4 x, uint c );\n" |
| 42454 | "int8 __ovld __conv intel_sub_group_shuffle_xor( int8 x, uint c );\n" |
| 42455 | "int16 __ovld __conv intel_sub_group_shuffle_xor( int16 x, uint c );\n" |
| 42456 | "\n" |
| 42457 | "uint __ovld __conv intel_sub_group_shuffle_xor( uint x, uint c );\n" |
| 42458 | "uint2 __ovld __conv intel_sub_group_shuffle_xor( uint2 x, uint c );\n" |
| 42459 | "uint3 __ovld __conv intel_sub_group_shuffle_xor( uint3 x, uint c );\n" |
| 42460 | "uint4 __ovld __conv intel_sub_group_shuffle_xor( uint4 x, uint c );\n" |
| 42461 | "uint8 __ovld __conv intel_sub_group_shuffle_xor( uint8 x, uint c );\n" |
| 42462 | "uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );\n" |
| 42463 | "\n" |
| 42464 | "long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );\n" |
| 42465 | "ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );\n" |
| 42466 | "\n" |
| 42467 | "uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );\n" |
| 42468 | "uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );\n" |
| 42469 | "uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );\n" |
| 42470 | "uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );\n" |
| 42471 | "\n" |
| 42472 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42473 | "uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);\n" |
| 42474 | "uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);\n" |
| 42475 | "uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);\n" |
| 42476 | "uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);\n" |
| 42477 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42478 | "\n" |
| 42479 | "uint __ovld __conv intel_sub_group_block_read( const __global uint* p );\n" |
| 42480 | "uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );\n" |
| 42481 | "uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );\n" |
| 42482 | "uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );\n" |
| 42483 | "\n" |
| 42484 | "void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);\n" |
| 42485 | "void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);\n" |
| 42486 | "void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);\n" |
| 42487 | "void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);\n" |
| 42488 | "\n" |
| 42489 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42490 | "void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);\n" |
| 42491 | "void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);\n" |
| 42492 | "void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);\n" |
| 42493 | "void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);\n" |
| 42494 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42495 | "\n" |
| 42496 | "void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );\n" |
| 42497 | "void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );\n" |
| 42498 | "void __ovld __conv intel_sub_group_block_write4( __global uint* p, uint4 data );\n" |
| 42499 | "void __ovld __conv intel_sub_group_block_write8( __global uint* p, uint8 data );\n" |
| 42500 | "\n" |
| 42501 | "#ifdef cl_khr_fp16\n" |
| 42502 | "half __ovld __conv intel_sub_group_shuffle( half x, uint c );\n" |
| 42503 | "half __ovld __conv intel_sub_group_shuffle_down( half prev, half cur, uint c );\n" |
| 42504 | "half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );\n" |
| 42505 | "half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );\n" |
| 42506 | "#endif\n" |
| 42507 | "\n" |
| 42508 | "#if defined(cl_khr_fp64)\n" |
| 42509 | "double __ovld __conv intel_sub_group_shuffle( double x, uint c );\n" |
| 42510 | "double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );\n" |
| 42511 | "double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );\n" |
| 42512 | "double __ovld __conv intel_sub_group_shuffle_xor( double x, uint c );\n" |
| 42513 | "#endif\n" |
| 42514 | "\n" |
| 42515 | "#endif //cl_intel_subgroups\n" |
| 42516 | "\n" |
| 42517 | "#if defined(cl_intel_subgroups_short)\n" |
| 42518 | "short __ovld __conv intel_sub_group_broadcast( short x, uint sub_group_local_id );\n" |
| 42519 | "short2 __ovld __conv intel_sub_group_broadcast( short2 x, uint sub_group_local_id );\n" |
| 42520 | "short3 __ovld __conv intel_sub_group_broadcast( short3 x, uint sub_group_local_id );\n" |
| 42521 | "short4 __ovld __conv intel_sub_group_broadcast( short4 x, uint sub_group_local_id );\n" |
| 42522 | "short8 __ovld __conv intel_sub_group_broadcast( short8 x, uint sub_group_local_id );\n" |
| 42523 | "\n" |
| 42524 | "ushort __ovld __conv intel_sub_group_broadcast( ushort x, uint sub_group_local_id );\n" |
| 42525 | "ushort2 __ovld __conv intel_sub_group_broadcast( ushort2 x, uint sub_group_local_id );\n" |
| 42526 | "ushort3 __ovld __conv intel_sub_group_broadcast( ushort3 x, uint sub_group_local_id );\n" |
| 42527 | "ushort4 __ovld __conv intel_sub_group_broadcast( ushort4 x, uint sub_group_local_id );\n" |
| 42528 | "ushort8 __ovld __conv intel_sub_group_broadcast( ushort8 x, uint sub_group_local_id );\n" |
| 42529 | "\n" |
| 42530 | "short __ovld __conv intel_sub_group_shuffle( short x, uint c );\n" |
| 42531 | "short2 __ovld __conv intel_sub_group_shuffle( short2 x, uint c );\n" |
| 42532 | "short3 __ovld __conv intel_sub_group_shuffle( short3 x, uint c );\n" |
| 42533 | "short4 __ovld __conv intel_sub_group_shuffle( short4 x, uint c );\n" |
| 42534 | "short8 __ovld __conv intel_sub_group_shuffle( short8 x, uint c );\n" |
| 42535 | "short16 __ovld __conv intel_sub_group_shuffle( short16 x, uint c);\n" |
| 42536 | "\n" |
| 42537 | "ushort __ovld __conv intel_sub_group_shuffle( ushort x, uint c );\n" |
| 42538 | "ushort2 __ovld __conv intel_sub_group_shuffle( ushort2 x, uint c );\n" |
| 42539 | "ushort3 __ovld __conv intel_sub_group_shuffle( ushort3 x, uint c );\n" |
| 42540 | "ushort4 __ovld __conv intel_sub_group_shuffle( ushort4 x, uint c );\n" |
| 42541 | "ushort8 __ovld __conv intel_sub_group_shuffle( ushort8 x, uint c );\n" |
| 42542 | "ushort16 __ovld __conv intel_sub_group_shuffle( ushort16 x, uint c );\n" |
| 42543 | "\n" |
| 42544 | "short __ovld __conv intel_sub_group_shuffle_down( short cur, short next, uint c );\n" |
| 42545 | "short2 __ovld __conv intel_sub_group_shuffle_down( short2 cur, short2 next, uint c );\n" |
| 42546 | "short3 __ovld __conv intel_sub_group_shuffle_down( short3 cur, short3 next, uint c );\n" |
| 42547 | "short4 __ovld __conv intel_sub_group_shuffle_down( short4 cur, short4 next, uint c );\n" |
| 42548 | "short8 __ovld __conv intel_sub_group_shuffle_down( short8 cur, short8 next, uint c );\n" |
| 42549 | "short16 __ovld __conv intel_sub_group_shuffle_down( short16 cur, short16 next, uint c );\n" |
| 42550 | "\n" |
| 42551 | "ushort __ovld __conv intel_sub_group_shuffle_down( ushort cur, ushort next, uint c );\n" |
| 42552 | "ushort2 __ovld __conv intel_sub_group_shuffle_down( ushort2 cur, ushort2 next, uint c );\n" |
| 42553 | "ushort3 __ovld __conv intel_sub_group_shuffle_down( ushort3 cur, ushort3 next, uint c );\n" |
| 42554 | "ushort4 __ovld __conv intel_sub_group_shuffle_down( ushort4 cur, ushort4 next, uint c );\n" |
| 42555 | "ushort8 __ovld __conv intel_sub_group_shuffle_down( ushort8 cur, ushort8 next, uint c );\n" |
| 42556 | "ushort16 __ovld __conv intel_sub_group_shuffle_down( ushort16 cur, ushort16 next, uint c );\n" |
| 42557 | "\n" |
| 42558 | "short __ovld __conv intel_sub_group_shuffle_up( short cur, short next, uint c );\n" |
| 42559 | "short2 __ovld __conv intel_sub_group_shuffle_up( short2 cur, short2 next, uint c );\n" |
| 42560 | "short3 __ovld __conv intel_sub_group_shuffle_up( short3 cur, short3 next, uint c );\n" |
| 42561 | "short4 __ovld __conv intel_sub_group_shuffle_up( short4 cur, short4 next, uint c );\n" |
| 42562 | "short8 __ovld __conv intel_sub_group_shuffle_up( short8 cur, short8 next, uint c );\n" |
| 42563 | "short16 __ovld __conv intel_sub_group_shuffle_up( short16 cur, short16 next, uint c );\n" |
| 42564 | "\n" |
| 42565 | "ushort __ovld __conv intel_sub_group_shuffle_up( ushort cur, ushort next, uint c );\n" |
| 42566 | "ushort2 __ovld __conv intel_sub_group_shuffle_up( ushort2 cur, ushort2 next, uint c );\n" |
| 42567 | "ushort3 __ovld __conv intel_sub_group_shuffle_up( ushort3 cur, ushort3 next, uint c );\n" |
| 42568 | "ushort4 __ovld __conv intel_sub_group_shuffle_up( ushort4 cur, ushort4 next, uint c );\n" |
| 42569 | "ushort8 __ovld __conv intel_sub_group_shuffle_up( ushort8 cur, ushort8 next, uint c );\n" |
| 42570 | "ushort16 __ovld __conv intel_sub_group_shuffle_up( ushort16 cur, ushort16 next, uint c );\n" |
| 42571 | "\n" |
| 42572 | "short __ovld __conv intel_sub_group_shuffle_xor( short x, uint c );\n" |
| 42573 | "short2 __ovld __conv intel_sub_group_shuffle_xor( short2 x, uint c );\n" |
| 42574 | "short3 __ovld __conv intel_sub_group_shuffle_xor( short3 x, uint c );\n" |
| 42575 | "short4 __ovld __conv intel_sub_group_shuffle_xor( short4 x, uint c );\n" |
| 42576 | "short8 __ovld __conv intel_sub_group_shuffle_xor( short8 x, uint c );\n" |
| 42577 | "short16 __ovld __conv intel_sub_group_shuffle_xor( short16 x, uint c );\n" |
| 42578 | "\n" |
| 42579 | "ushort __ovld __conv intel_sub_group_shuffle_xor( ushort x, uint c );\n" |
| 42580 | "ushort2 __ovld __conv intel_sub_group_shuffle_xor( ushort2 x, uint c );\n" |
| 42581 | "ushort3 __ovld __conv intel_sub_group_shuffle_xor( ushort3 x, uint c );\n" |
| 42582 | "ushort4 __ovld __conv intel_sub_group_shuffle_xor( ushort4 x, uint c );\n" |
| 42583 | "ushort8 __ovld __conv intel_sub_group_shuffle_xor( ushort8 x, uint c );\n" |
| 42584 | "ushort16 __ovld __conv intel_sub_group_shuffle_xor( ushort16 x, uint c );\n" |
| 42585 | "\n" |
| 42586 | "short __ovld __conv intel_sub_group_reduce_add( short x );\n" |
| 42587 | "ushort __ovld __conv intel_sub_group_reduce_add( ushort x );\n" |
| 42588 | "short __ovld __conv intel_sub_group_reduce_min( short x );\n" |
| 42589 | "ushort __ovld __conv intel_sub_group_reduce_min( ushort x );\n" |
| 42590 | "short __ovld __conv intel_sub_group_reduce_max( short x );\n" |
| 42591 | "ushort __ovld __conv intel_sub_group_reduce_max( ushort x );\n" |
| 42592 | "\n" |
| 42593 | "short __ovld __conv intel_sub_group_scan_exclusive_add( short x );\n" |
| 42594 | "ushort __ovld __conv intel_sub_group_scan_exclusive_add( ushort x );\n" |
| 42595 | "short __ovld __conv intel_sub_group_scan_exclusive_min( short x );\n" |
| 42596 | "ushort __ovld __conv intel_sub_group_scan_exclusive_min( ushort x );\n" |
| 42597 | "short __ovld __conv intel_sub_group_scan_exclusive_max( short x );\n" |
| 42598 | "ushort __ovld __conv intel_sub_group_scan_exclusive_max( ushort x );\n" |
| 42599 | "\n" |
| 42600 | "short __ovld __conv intel_sub_group_scan_inclusive_add( short x );\n" |
| 42601 | "ushort __ovld __conv intel_sub_group_scan_inclusive_add( ushort x );\n" |
| 42602 | "short __ovld __conv intel_sub_group_scan_inclusive_min( short x );\n" |
| 42603 | "ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );\n" |
| 42604 | "short __ovld __conv intel_sub_group_scan_inclusive_max( short x );\n" |
| 42605 | "ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );\n" |
| 42606 | "\n" |
| 42607 | "uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );\n" |
| 42608 | "uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );\n" |
| 42609 | "uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );\n" |
| 42610 | "uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );\n" |
| 42611 | "\n" |
| 42612 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42613 | "uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );\n" |
| 42614 | "uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );\n" |
| 42615 | "uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );\n" |
| 42616 | "uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );\n" |
| 42617 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42618 | "\n" |
| 42619 | "uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );\n" |
| 42620 | "uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );\n" |
| 42621 | "uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );\n" |
| 42622 | "uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );\n" |
| 42623 | "\n" |
| 42624 | "void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );\n" |
| 42625 | "void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );\n" |
| 42626 | "void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );\n" |
| 42627 | "void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );\n" |
| 42628 | "\n" |
| 42629 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42630 | "void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );\n" |
| 42631 | "void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );\n" |
| 42632 | "void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );\n" |
| 42633 | "void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );\n" |
| 42634 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42635 | "\n" |
| 42636 | "void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );\n" |
| 42637 | "void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );\n" |
| 42638 | "void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );\n" |
| 42639 | "void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );\n" |
| 42640 | "\n" |
| 42641 | "ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );\n" |
| 42642 | "ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );\n" |
| 42643 | "ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );\n" |
| 42644 | "ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );\n" |
| 42645 | "\n" |
| 42646 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42647 | "ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);\n" |
| 42648 | "ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);\n" |
| 42649 | "ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);\n" |
| 42650 | "ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);\n" |
| 42651 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42652 | "\n" |
| 42653 | "ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );\n" |
| 42654 | "ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );\n" |
| 42655 | "ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );\n" |
| 42656 | "ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );\n" |
| 42657 | "\n" |
| 42658 | "void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);\n" |
| 42659 | "void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);\n" |
| 42660 | "void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);\n" |
| 42661 | "void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);\n" |
| 42662 | "\n" |
| 42663 | "#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42664 | "void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);\n" |
| 42665 | "void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);\n" |
| 42666 | "void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);\n" |
| 42667 | "void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);\n" |
| 42668 | "#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)\n" |
| 42669 | "\n" |
| 42670 | "void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );\n" |
| 42671 | "void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );\n" |
| 42672 | "void __ovld __conv intel_sub_group_block_write_us4( __global ushort* p, ushort4 data );\n" |
| 42673 | "void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, ushort8 data );\n" |
| 42674 | "#endif // cl_intel_subgroups_short\n" |
| 42675 | "\n" |
| 42676 | "#ifdef cl_amd_media_ops\n" |
| 42677 | "uint __ovld amd_bitalign(uint a, uint b, uint c);\n" |
| 42678 | "uint2 __ovld amd_bitalign(uint2 a, uint2 b, uint2 c);\n" |
| 42679 | "uint3 __ovld amd_bitalign(uint3 a, uint3 b, uint3 c);\n" |
| 42680 | "uint4 __ovld amd_bitalign(uint4 a, uint4 b, uint4 c);\n" |
| 42681 | "uint8 __ovld amd_bitalign(uint8 a, uint8 b, uint8 c);\n" |
| 42682 | "uint16 __ovld amd_bitalign(uint16 a, uint16 b, uint16 c);\n" |
| 42683 | "\n" |
| 42684 | "uint __ovld amd_bytealign(uint a, uint b, uint c);\n" |
| 42685 | "uint2 __ovld amd_bytealign(uint2 a, uint2 b, uint2 c);\n" |
| 42686 | "uint3 __ovld amd_bytealign(uint3 a, uint3 b, uint3 c);\n" |
| 42687 | "uint4 __ovld amd_bytealign(uint4 a, uint4 b, uint4 c);\n" |
| 42688 | "uint8 __ovld amd_bytealign(uint8 a, uint8 b, uint8 c);\n" |
| 42689 | "uint16 __ovld amd_bytealign(uint16 a, uint16 b, uint16 c);\n" |
| 42690 | "\n" |
| 42691 | "uint __ovld amd_lerp(uint a, uint b, uint c);\n" |
| 42692 | "uint2 __ovld amd_lerp(uint2 a, uint2 b, uint2 c);\n" |
| 42693 | "uint3 __ovld amd_lerp(uint3 a, uint3 b, uint3 c);\n" |
| 42694 | "uint4 __ovld amd_lerp(uint4 a, uint4 b, uint4 c);\n" |
| 42695 | "uint8 __ovld amd_lerp(uint8 a, uint8 b, uint8 c);\n" |
| 42696 | "uint16 __ovld amd_lerp(uint16 a, uint16 b, uint16 c);\n" |
| 42697 | "\n" |
| 42698 | "uint __ovld amd_pack(float4 v);\n" |
| 42699 | "\n" |
| 42700 | "uint __ovld amd_sad4(uint4 x, uint4 y, uint z);\n" |
| 42701 | "\n" |
| 42702 | "uint __ovld amd_sadhi(uint a, uint b, uint c);\n" |
| 42703 | "uint2 __ovld amd_sadhi(uint2 a, uint2 b, uint2 c);\n" |
| 42704 | "uint3 __ovld amd_sadhi(uint3 a, uint3 b, uint3 c);\n" |
| 42705 | "uint4 __ovld amd_sadhi(uint4 a, uint4 b, uint4 c);\n" |
| 42706 | "uint8 __ovld amd_sadhi(uint8 a, uint8 b, uint8 c);\n" |
| 42707 | "uint16 __ovld amd_sadhi(uint16 a, uint16 b, uint16 c);\n" |
| 42708 | "\n" |
| 42709 | "uint __ovld amd_sad(uint a, uint b, uint c);\n" |
| 42710 | "uint2 __ovld amd_sad(uint2 a, uint2 b, uint2 c);\n" |
| 42711 | "uint3 __ovld amd_sad(uint3 a, uint3 b, uint3 c);\n" |
| 42712 | "uint4 __ovld amd_sad(uint4 a, uint4 b, uint4 c);\n" |
| 42713 | "uint8 __ovld amd_sad(uint8 a, uint8 b, uint8 c);\n" |
| 42714 | "uint16 __ovld amd_sad(uint16 a, uint16 b, uint16 c);\n" |
| 42715 | "\n" |
| 42716 | "float __ovld amd_unpack0(uint a);\n" |
| 42717 | "float2 __ovld amd_unpack0(uint2 a);\n" |
| 42718 | "float3 __ovld amd_unpack0(uint3 a);\n" |
| 42719 | "float4 __ovld amd_unpack0(uint4 a);\n" |
| 42720 | "float8 __ovld amd_unpack0(uint8 a);\n" |
| 42721 | "float16 __ovld amd_unpack0(uint16 a);\n" |
| 42722 | "\n" |
| 42723 | "float __ovld amd_unpack1(uint a);\n" |
| 42724 | "float2 __ovld amd_unpack1(uint2 a);\n" |
| 42725 | "float3 __ovld amd_unpack1(uint3 a);\n" |
| 42726 | "float4 __ovld amd_unpack1(uint4 a);\n" |
| 42727 | "float8 __ovld amd_unpack1(uint8 a);\n" |
| 42728 | "float16 __ovld amd_unpack1(uint16 a);\n" |
| 42729 | "\n" |
| 42730 | "float __ovld amd_unpack2(uint a);\n" |
| 42731 | "float2 __ovld amd_unpack2(uint2 a);\n" |
| 42732 | "float3 __ovld amd_unpack2(uint3 a);\n" |
| 42733 | "float4 __ovld amd_unpack2(uint4 a);\n" |
| 42734 | "float8 __ovld amd_unpack2(uint8 a);\n" |
| 42735 | "float16 __ovld amd_unpack2(uint16 a);\n" |
| 42736 | "\n" |
| 42737 | "float __ovld amd_unpack3(uint a);\n" |
| 42738 | "float2 __ovld amd_unpack3(uint2 a);\n" |
| 42739 | "float3 __ovld amd_unpack3(uint3 a);\n" |
| 42740 | "float4 __ovld amd_unpack3(uint4 a);\n" |
| 42741 | "float8 __ovld amd_unpack3(uint8 a);\n" |
| 42742 | "float16 __ovld amd_unpack3(uint16 a);\n" |
| 42743 | "#endif // cl_amd_media_ops\n" |
| 42744 | "\n" |
| 42745 | "#ifdef cl_amd_media_ops2\n" |
| 42746 | "int __ovld amd_bfe(int src0, uint src1, uint src2);\n" |
| 42747 | "int2 __ovld amd_bfe(int2 src0, uint2 src1, uint2 src2);\n" |
| 42748 | "int3 __ovld amd_bfe(int3 src0, uint3 src1, uint3 src2);\n" |
| 42749 | "int4 __ovld amd_bfe(int4 src0, uint4 src1, uint4 src2);\n" |
| 42750 | "int8 __ovld amd_bfe(int8 src0, uint8 src1, uint8 src2);\n" |
| 42751 | "int16 __ovld amd_bfe(int16 src0, uint16 src1, uint16 src2);\n" |
| 42752 | "\n" |
| 42753 | "uint __ovld amd_bfe(uint src0, uint src1, uint src2);\n" |
| 42754 | "uint2 __ovld amd_bfe(uint2 src0, uint2 src1, uint2 src2);\n" |
| 42755 | "uint3 __ovld amd_bfe(uint3 src0, uint3 src1, uint3 src2);\n" |
| 42756 | "uint4 __ovld amd_bfe(uint4 src0, uint4 src1, uint4 src2);\n" |
| 42757 | "uint8 __ovld amd_bfe(uint8 src0, uint8 src1, uint8 src2);\n" |
| 42758 | "uint16 __ovld amd_bfe(uint16 src0, uint16 src1, uint16 src2);\n" |
| 42759 | "\n" |
| 42760 | "uint __ovld amd_bfm(uint src0, uint src1);\n" |
| 42761 | "uint2 __ovld amd_bfm(uint2 src0, uint2 src1);\n" |
| 42762 | "uint3 __ovld amd_bfm(uint3 src0, uint3 src1);\n" |
| 42763 | "uint4 __ovld amd_bfm(uint4 src0, uint4 src1);\n" |
| 42764 | "uint8 __ovld amd_bfm(uint8 src0, uint8 src1);\n" |
| 42765 | "uint16 __ovld amd_bfm(uint16 src0, uint16 src1);\n" |
| 42766 | "\n" |
| 42767 | "float __ovld amd_max3(float src0, float src1, float src2);\n" |
| 42768 | "float2 __ovld amd_max3(float2 src0, float2 src1, float2 src2);\n" |
| 42769 | "float3 __ovld amd_max3(float3 src0, float3 src1, float3 src2);\n" |
| 42770 | "float4 __ovld amd_max3(float4 src0, float4 src1, float4 src2);\n" |
| 42771 | "float8 __ovld amd_max3(float8 src0, float8 src1, float8 src2);\n" |
| 42772 | "float16 __ovld amd_max3(float16 src0, float16 src1, float16 src2);\n" |
| 42773 | "\n" |
| 42774 | "int __ovld amd_max3(int src0, int src1, int src2);\n" |
| 42775 | "int2 __ovld amd_max3(int2 src0, int2 src1, int2 src2);\n" |
| 42776 | "int3 __ovld amd_max3(int3 src0, int3 src1, int3 src2);\n" |
| 42777 | "int4 __ovld amd_max3(int4 src0, int4 src1, int4 src2);\n" |
| 42778 | "int8 __ovld amd_max3(int8 src0, int8 src1, int8 src2);\n" |
| 42779 | "int16 __ovld amd_max3(int16 src0, int16 src1, int16 src2);\n" |
| 42780 | "\n" |
| 42781 | "uint __ovld amd_max3(uint src0, uint src1, uint src2);\n" |
| 42782 | "uint2 __ovld amd_max3(uint2 src0, uint2 src1, uint2 src2);\n" |
| 42783 | "uint3 __ovld amd_max3(uint3 src0, uint3 src1, uint3 src2);\n" |
| 42784 | "uint4 __ovld amd_max3(uint4 src0, uint4 src1, uint4 src2);\n" |
| 42785 | "uint8 __ovld amd_max3(uint8 src0, uint8 src1, uint8 src2);\n" |
| 42786 | "uint16 __ovld amd_max3(uint16 src0, uint16 src1, uint16 src2);\n" |
| 42787 | "\n" |
| 42788 | "float __ovld amd_median3(float src0, float src1, float src2);\n" |
| 42789 | "float2 __ovld amd_median3(float2 src0, float2 src1, float2 src2);\n" |
| 42790 | "float3 __ovld amd_median3(float3 src0, float3 src1, float3 src2);\n" |
| 42791 | "float4 __ovld amd_median3(float4 src0, float4 src1, float4 src2);\n" |
| 42792 | "float8 __ovld amd_median3(float8 src0, float8 src1, float8 src2);\n" |
| 42793 | "float16 __ovld amd_median3(float16 src0, float16 src1, float16 src2);\n" |
| 42794 | "\n" |
| 42795 | "int __ovld amd_median3(int src0, int src1, int src2);\n" |
| 42796 | "int2 __ovld amd_median3(int2 src0, int2 src1, int2 src2);\n" |
| 42797 | "int3 __ovld amd_median3(int3 src0, int3 src1, int3 src2);\n" |
| 42798 | "int4 __ovld amd_median3(int4 src0, int4 src1, int4 src2);\n" |
| 42799 | "int8 __ovld amd_median3(int8 src0, int8 src1, int8 src2);\n" |
| 42800 | "int16 __ovld amd_median3(int16 src0, int16 src1, int16 src2);\n" |
| 42801 | "\n" |
| 42802 | "uint __ovld amd_median3(uint src0, uint src1, uint src2);\n" |
| 42803 | "uint2 __ovld amd_median3(uint2 src0, uint2 src1, uint2 src2);\n" |
| 42804 | "uint3 __ovld amd_median3(uint3 src0, uint3 src1, uint3 src2);\n" |
| 42805 | "uint4 __ovld amd_median3(uint4 src0, uint4 src1, uint4 src2);\n" |
| 42806 | "uint8 __ovld amd_median3(uint8 src0, uint8 src1, uint8 src2);\n" |
| 42807 | "uint16 __ovld amd_median3(uint16 src0, uint16 src1, uint16 src2);\n" |
| 42808 | "\n" |
| 42809 | "float __ovld amd_min3(float src0, float src1, float src);\n" |
| 42810 | "float2 __ovld amd_min3(float2 src0, float2 src1, float2 src);\n" |
| 42811 | "float3 __ovld amd_min3(float3 src0, float3 src1, float3 src);\n" |
| 42812 | "float4 __ovld amd_min3(float4 src0, float4 src1, float4 src);\n" |
| 42813 | "float8 __ovld amd_min3(float8 src0, float8 src1, float8 src);\n" |
| 42814 | "float16 __ovld amd_min3(float16 src0, float16 src1, float16 src);\n" |
| 42815 | "\n" |
| 42816 | "int __ovld amd_min3(int src0, int src1, int src2);\n" |
| 42817 | "int2 __ovld amd_min3(int2 src0, int2 src1, int2 src2);\n" |
| 42818 | "int3 __ovld amd_min3(int3 src0, int3 src1, int3 src2);\n" |
| 42819 | "int4 __ovld amd_min3(int4 src0, int4 src1, int4 src2);\n" |
| 42820 | "int8 __ovld amd_min3(int8 src0, int8 src1, int8 src2);\n" |
| 42821 | "int16 __ovld amd_min3(int16 src0, int16 src1, int16 src2);\n" |
| 42822 | "\n" |
| 42823 | "uint __ovld amd_min3(uint src0, uint src1, uint src2);\n" |
| 42824 | "uint2 __ovld amd_min3(uint2 src0, uint2 src1, uint2 src2);\n" |
| 42825 | "uint3 __ovld amd_min3(uint3 src0, uint3 src1, uint3 src2);\n" |
| 42826 | "uint4 __ovld amd_min3(uint4 src0, uint4 src1, uint4 src2);\n" |
| 42827 | "uint8 __ovld amd_min3(uint8 src0, uint8 src1, uint8 src2);\n" |
| 42828 | "uint16 __ovld amd_min3(uint16 src0, uint16 src1, uint16 src2);\n" |
| 42829 | "\n" |
| 42830 | "ulong __ovld amd_mqsad(ulong src0, uint src1, ulong src2);\n" |
| 42831 | "ulong2 __ovld amd_mqsad(ulong2 src0, uint2 src1, ulong2 src2);\n" |
| 42832 | "ulong3 __ovld amd_mqsad(ulong3 src0, uint3 src1, ulong3 src2);\n" |
| 42833 | "ulong4 __ovld amd_mqsad(ulong4 src0, uint4 src1, ulong4 src2);\n" |
| 42834 | "ulong8 __ovld amd_mqsad(ulong8 src0, uint8 src1, ulong8 src2);\n" |
| 42835 | "ulong16 __ovld amd_mqsad(ulong16 src0, uint16 src1, ulong16 src2);\n" |
| 42836 | "\n" |
| 42837 | "ulong __ovld amd_qsad(ulong src0, uint src1, ulong src2);\n" |
| 42838 | "ulong2 __ovld amd_qsad(ulong2 src0, uint2 src1, ulong2 src2);\n" |
| 42839 | "ulong3 __ovld amd_qsad(ulong3 src0, uint3 src1, ulong3 src2);\n" |
| 42840 | "ulong4 __ovld amd_qsad(ulong4 src0, uint4 src1, ulong4 src2);\n" |
| 42841 | "ulong8 __ovld amd_qsad(ulong8 src0, uint8 src1, ulong8 src2);\n" |
| 42842 | "ulong16 __ovld amd_qsad(ulong16 src0, uint16 src1, ulong16 src2);\n" |
| 42843 | "\n" |
| 42844 | "uint __ovld amd_msad(uint src0, uint src1, uint src2);\n" |
| 42845 | "uint2 __ovld amd_msad(uint2 src0, uint2 src1, uint2 src2);\n" |
| 42846 | "uint3 __ovld amd_msad(uint3 src0, uint3 src1, uint3 src2);\n" |
| 42847 | "uint4 __ovld amd_msad(uint4 src0, uint4 src1, uint4 src2);\n" |
| 42848 | "uint8 __ovld amd_msad(uint8 src0, uint8 src1, uint8 src2);\n" |
| 42849 | "uint16 __ovld amd_msad(uint16 src0, uint16 src1, uint16 src2);\n" |
| 42850 | "\n" |
| 42851 | "uint __ovld amd_sadd(uint src0, uint src1, uint src2);\n" |
| 42852 | "uint2 __ovld amd_sadd(uint2 src0, uint2 src1, uint2 src2);\n" |
| 42853 | "uint3 __ovld amd_sadd(uint3 src0, uint3 src1, uint3 src2);\n" |
| 42854 | "uint4 __ovld amd_sadd(uint4 src0, uint4 src1, uint4 src2);\n" |
| 42855 | "uint8 __ovld amd_sadd(uint8 src0, uint8 src1, uint8 src2);\n" |
| 42856 | "uint16 __ovld amd_sadd(uint16 src0, uint16 src1, uint16 src2);\n" |
| 42857 | "\n" |
| 42858 | "uint __ovld amd_sadw(uint src0, uint src1, uint src2);\n" |
| 42859 | "uint2 __ovld amd_sadw(uint2 src0, uint2 src1, uint2 src2);\n" |
| 42860 | "uint3 __ovld amd_sadw(uint3 src0, uint3 src1, uint3 src2);\n" |
| 42861 | "uint4 __ovld amd_sadw(uint4 src0, uint4 src1, uint4 src2);\n" |
| 42862 | "uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2);\n" |
| 42863 | "uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2);\n" |
| 42864 | "#endif // cl_amd_media_ops2\n" |
| 42865 | "\n" |
| 42866 | "// Disable any extensions we may have enabled previously.\n" |
| 42867 | "#pragma OPENCL EXTENSION all : disable\n" |
| 42868 | "\n" |
| 42869 | "#undef __cnfn\n" |
| 42870 | "#undef __ovld\n" |
| 42871 | "#endif //_OPENCL_H_\n" |
| 42872 | "" } , |
| 42873 | { "/builtins/pconfigintrin.h" , "/*===---- pconfigintrin.h - X86 platform configuration ---------------------===\n" |
| 42874 | " *\n" |
| 42875 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 42876 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 42877 | " * in the Software without restriction, including without limitation the rights\n" |
| 42878 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 42879 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 42880 | " * furnished to do so, subject to the following conditions:\n" |
| 42881 | " *\n" |
| 42882 | " * The above copyright notice and this permission notice shall be included in\n" |
| 42883 | " * all copies or substantial portions of the Software.\n" |
| 42884 | " *\n" |
| 42885 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 42886 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 42887 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 42888 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 42889 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 42890 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 42891 | " * THE SOFTWARE.\n" |
| 42892 | " *\n" |
| 42893 | " *===-----------------------------------------------------------------------===\n" |
| 42894 | " */\n" |
| 42895 | "\n" |
| 42896 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 42897 | "#error \"Never use <pconfigintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 42898 | "#endif\n" |
| 42899 | "\n" |
| 42900 | "#ifndef __PCONFIGINTRIN_H\n" |
| 42901 | "#define __PCONFIGINTRIN_H\n" |
| 42902 | "\n" |
| 42903 | "#define __PCONFIG_KEY_PROGRAM 0x00000001\n" |
| 42904 | "\n" |
| 42905 | "/* Define the default attributes for the functions in this file. */\n" |
| 42906 | "#define __DEFAULT_FN_ATTRS \\\n" |
| 42907 | " __attribute__((__always_inline__, __nodebug__, __target__(\"pconfig\")))\n" |
| 42908 | "\n" |
| 42909 | "static __inline unsigned int __DEFAULT_FN_ATTRS\n" |
| 42910 | "_pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n" |
| 42911 | "{\n" |
| 42912 | " unsigned int __result;\n" |
| 42913 | " __asm__ (\"pconfig\"\n" |
| 42914 | " : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n" |
| 42915 | " : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n" |
| 42916 | " : \"cc\");\n" |
| 42917 | " return __result;\n" |
| 42918 | "}\n" |
| 42919 | "\n" |
| 42920 | "#undef __DEFAULT_FN_ATTRS\n" |
| 42921 | "\n" |
| 42922 | "#endif\n" |
| 42923 | "" } , |
| 42924 | { "/builtins/pkuintrin.h" , "/*===---- pkuintrin.h - PKU intrinsics -------------------------------------===\n" |
| 42925 | " *\n" |
| 42926 | " *\n" |
| 42927 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 42928 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 42929 | " * in the Software without restriction, including without limitation the rights\n" |
| 42930 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 42931 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 42932 | " * furnished to do so, subject to the following conditions:\n" |
| 42933 | " *\n" |
| 42934 | " * The above copyright notice and this permission notice shall be included in\n" |
| 42935 | " * all copies or substantial portions of the Software.\n" |
| 42936 | " *\n" |
| 42937 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 42938 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 42939 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 42940 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 42941 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 42942 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 42943 | " * THE SOFTWARE.\n" |
| 42944 | " *\n" |
| 42945 | " *===-----------------------------------------------------------------------===\n" |
| 42946 | " */\n" |
| 42947 | "#ifndef __IMMINTRIN_H\n" |
| 42948 | "#error \"Never use <pkuintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 42949 | "#endif\n" |
| 42950 | "\n" |
| 42951 | "#ifndef __PKUINTRIN_H\n" |
| 42952 | "#define __PKUINTRIN_H\n" |
| 42953 | "\n" |
| 42954 | "/* Define the default attributes for the functions in this file. */\n" |
| 42955 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"pku\")))\n" |
| 42956 | "\n" |
| 42957 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 42958 | "_rdpkru_u32(void)\n" |
| 42959 | "{\n" |
| 42960 | " return __builtin_ia32_rdpkru();\n" |
| 42961 | "}\n" |
| 42962 | "\n" |
| 42963 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 42964 | "_wrpkru(unsigned int __val)\n" |
| 42965 | "{\n" |
| 42966 | " __builtin_ia32_wrpkru(__val);\n" |
| 42967 | "}\n" |
| 42968 | "\n" |
| 42969 | "#undef __DEFAULT_FN_ATTRS\n" |
| 42970 | "\n" |
| 42971 | "#endif\n" |
| 42972 | "" } , |
| 42973 | { "/builtins/pmmintrin.h" , "/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===\n" |
| 42974 | " *\n" |
| 42975 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 42976 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 42977 | " * in the Software without restriction, including without limitation the rights\n" |
| 42978 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 42979 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 42980 | " * furnished to do so, subject to the following conditions:\n" |
| 42981 | " *\n" |
| 42982 | " * The above copyright notice and this permission notice shall be included in\n" |
| 42983 | " * all copies or substantial portions of the Software.\n" |
| 42984 | " *\n" |
| 42985 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 42986 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 42987 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 42988 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 42989 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 42990 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 42991 | " * THE SOFTWARE.\n" |
| 42992 | " *\n" |
| 42993 | " *===-----------------------------------------------------------------------===\n" |
| 42994 | " */\n" |
| 42995 | "\n" |
| 42996 | "#ifndef __PMMINTRIN_H\n" |
| 42997 | "#define __PMMINTRIN_H\n" |
| 42998 | "\n" |
| 42999 | "#include <emmintrin.h>\n" |
| 43000 | "\n" |
| 43001 | "/* Define the default attributes for the functions in this file. */\n" |
| 43002 | "#define __DEFAULT_FN_ATTRS \\\n" |
| 43003 | " __attribute__((__always_inline__, __nodebug__, __target__(\"sse3\"), __min_vector_width__(128)))\n" |
| 43004 | "\n" |
| 43005 | "/// Loads data from an unaligned memory location to elements in a 128-bit\n" |
| 43006 | "/// vector.\n" |
| 43007 | "///\n" |
| 43008 | "/// If the address of the data is not 16-byte aligned, the instruction may\n" |
| 43009 | "/// read two adjacent aligned blocks of memory to retrieve the requested\n" |
| 43010 | "/// data.\n" |
| 43011 | "///\n" |
| 43012 | "/// \\headerfile <x86intrin.h>\n" |
| 43013 | "///\n" |
| 43014 | "/// This intrinsic corresponds to the <c> VLDDQU </c> instruction.\n" |
| 43015 | "///\n" |
| 43016 | "/// \\param __p\n" |
| 43017 | "/// A pointer to a 128-bit integer vector containing integer values.\n" |
| 43018 | "/// \\returns A 128-bit vector containing the moved values.\n" |
| 43019 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 43020 | "_mm_lddqu_si128(__m128i const *__p)\n" |
| 43021 | "{\n" |
| 43022 | " return (__m128i)__builtin_ia32_lddqu((char const *)__p);\n" |
| 43023 | "}\n" |
| 43024 | "\n" |
| 43025 | "/// Adds the even-indexed values and subtracts the odd-indexed values of\n" |
| 43026 | "/// two 128-bit vectors of [4 x float].\n" |
| 43027 | "///\n" |
| 43028 | "/// \\headerfile <x86intrin.h>\n" |
| 43029 | "///\n" |
| 43030 | "/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.\n" |
| 43031 | "///\n" |
| 43032 | "/// \\param __a\n" |
| 43033 | "/// A 128-bit vector of [4 x float] containing the left source operand.\n" |
| 43034 | "/// \\param __b\n" |
| 43035 | "/// A 128-bit vector of [4 x float] containing the right source operand.\n" |
| 43036 | "/// \\returns A 128-bit vector of [4 x float] containing the alternating sums and\n" |
| 43037 | "/// differences of both operands.\n" |
| 43038 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 43039 | "_mm_addsub_ps(__m128 __a, __m128 __b)\n" |
| 43040 | "{\n" |
| 43041 | " return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);\n" |
| 43042 | "}\n" |
| 43043 | "\n" |
| 43044 | "/// Horizontally adds the adjacent pairs of values contained in two\n" |
| 43045 | "/// 128-bit vectors of [4 x float].\n" |
| 43046 | "///\n" |
| 43047 | "/// \\headerfile <x86intrin.h>\n" |
| 43048 | "///\n" |
| 43049 | "/// This intrinsic corresponds to the <c> VHADDPS </c> instruction.\n" |
| 43050 | "///\n" |
| 43051 | "/// \\param __a\n" |
| 43052 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 43053 | "/// The horizontal sums of the values are stored in the lower bits of the\n" |
| 43054 | "/// destination.\n" |
| 43055 | "/// \\param __b\n" |
| 43056 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 43057 | "/// The horizontal sums of the values are stored in the upper bits of the\n" |
| 43058 | "/// destination.\n" |
| 43059 | "/// \\returns A 128-bit vector of [4 x float] containing the horizontal sums of\n" |
| 43060 | "/// both operands.\n" |
| 43061 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 43062 | "_mm_hadd_ps(__m128 __a, __m128 __b)\n" |
| 43063 | "{\n" |
| 43064 | " return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);\n" |
| 43065 | "}\n" |
| 43066 | "\n" |
| 43067 | "/// Horizontally subtracts the adjacent pairs of values contained in two\n" |
| 43068 | "/// 128-bit vectors of [4 x float].\n" |
| 43069 | "///\n" |
| 43070 | "/// \\headerfile <x86intrin.h>\n" |
| 43071 | "///\n" |
| 43072 | "/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.\n" |
| 43073 | "///\n" |
| 43074 | "/// \\param __a\n" |
| 43075 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 43076 | "/// The horizontal differences between the values are stored in the lower\n" |
| 43077 | "/// bits of the destination.\n" |
| 43078 | "/// \\param __b\n" |
| 43079 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 43080 | "/// The horizontal differences between the values are stored in the upper\n" |
| 43081 | "/// bits of the destination.\n" |
| 43082 | "/// \\returns A 128-bit vector of [4 x float] containing the horizontal\n" |
| 43083 | "/// differences of both operands.\n" |
| 43084 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 43085 | "_mm_hsub_ps(__m128 __a, __m128 __b)\n" |
| 43086 | "{\n" |
| 43087 | " return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);\n" |
| 43088 | "}\n" |
| 43089 | "\n" |
| 43090 | "/// Moves and duplicates odd-indexed values from a 128-bit vector\n" |
| 43091 | "/// of [4 x float] to float values stored in a 128-bit vector of\n" |
| 43092 | "/// [4 x float].\n" |
| 43093 | "///\n" |
| 43094 | "/// \\headerfile <x86intrin.h>\n" |
| 43095 | "///\n" |
| 43096 | "/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.\n" |
| 43097 | "///\n" |
| 43098 | "/// \\param __a\n" |
| 43099 | "/// A 128-bit vector of [4 x float]. \\n\n" |
| 43100 | "/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of\n" |
| 43101 | "/// the destination. \\n\n" |
| 43102 | "/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the\n" |
| 43103 | "/// destination.\n" |
| 43104 | "/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n" |
| 43105 | "/// values.\n" |
| 43106 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 43107 | "_mm_movehdup_ps(__m128 __a)\n" |
| 43108 | "{\n" |
| 43109 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);\n" |
| 43110 | "}\n" |
| 43111 | "\n" |
| 43112 | "/// Duplicates even-indexed values from a 128-bit vector of\n" |
| 43113 | "/// [4 x float] to float values stored in a 128-bit vector of [4 x float].\n" |
| 43114 | "///\n" |
| 43115 | "/// \\headerfile <x86intrin.h>\n" |
| 43116 | "///\n" |
| 43117 | "/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.\n" |
| 43118 | "///\n" |
| 43119 | "/// \\param __a\n" |
| 43120 | "/// A 128-bit vector of [4 x float] \\n\n" |
| 43121 | "/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of\n" |
| 43122 | "/// the destination. \\n\n" |
| 43123 | "/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the\n" |
| 43124 | "/// destination.\n" |
| 43125 | "/// \\returns A 128-bit vector of [4 x float] containing the moved and duplicated\n" |
| 43126 | "/// values.\n" |
| 43127 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 43128 | "_mm_moveldup_ps(__m128 __a)\n" |
| 43129 | "{\n" |
| 43130 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);\n" |
| 43131 | "}\n" |
| 43132 | "\n" |
| 43133 | "/// Adds the even-indexed values and subtracts the odd-indexed values of\n" |
| 43134 | "/// two 128-bit vectors of [2 x double].\n" |
| 43135 | "///\n" |
| 43136 | "/// \\headerfile <x86intrin.h>\n" |
| 43137 | "///\n" |
| 43138 | "/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.\n" |
| 43139 | "///\n" |
| 43140 | "/// \\param __a\n" |
| 43141 | "/// A 128-bit vector of [2 x double] containing the left source operand.\n" |
| 43142 | "/// \\param __b\n" |
| 43143 | "/// A 128-bit vector of [2 x double] containing the right source operand.\n" |
| 43144 | "/// \\returns A 128-bit vector of [2 x double] containing the alternating sums\n" |
| 43145 | "/// and differences of both operands.\n" |
| 43146 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 43147 | "_mm_addsub_pd(__m128d __a, __m128d __b)\n" |
| 43148 | "{\n" |
| 43149 | " return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);\n" |
| 43150 | "}\n" |
| 43151 | "\n" |
| 43152 | "/// Horizontally adds the pairs of values contained in two 128-bit\n" |
| 43153 | "/// vectors of [2 x double].\n" |
| 43154 | "///\n" |
| 43155 | "/// \\headerfile <x86intrin.h>\n" |
| 43156 | "///\n" |
| 43157 | "/// This intrinsic corresponds to the <c> VHADDPD </c> instruction.\n" |
| 43158 | "///\n" |
| 43159 | "/// \\param __a\n" |
| 43160 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 43161 | "/// The horizontal sum of the values is stored in the lower bits of the\n" |
| 43162 | "/// destination.\n" |
| 43163 | "/// \\param __b\n" |
| 43164 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 43165 | "/// The horizontal sum of the values is stored in the upper bits of the\n" |
| 43166 | "/// destination.\n" |
| 43167 | "/// \\returns A 128-bit vector of [2 x double] containing the horizontal sums of\n" |
| 43168 | "/// both operands.\n" |
| 43169 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 43170 | "_mm_hadd_pd(__m128d __a, __m128d __b)\n" |
| 43171 | "{\n" |
| 43172 | " return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);\n" |
| 43173 | "}\n" |
| 43174 | "\n" |
| 43175 | "/// Horizontally subtracts the pairs of values contained in two 128-bit\n" |
| 43176 | "/// vectors of [2 x double].\n" |
| 43177 | "///\n" |
| 43178 | "/// \\headerfile <x86intrin.h>\n" |
| 43179 | "///\n" |
| 43180 | "/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.\n" |
| 43181 | "///\n" |
| 43182 | "/// \\param __a\n" |
| 43183 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 43184 | "/// The horizontal difference of the values is stored in the lower bits of\n" |
| 43185 | "/// the destination.\n" |
| 43186 | "/// \\param __b\n" |
| 43187 | "/// A 128-bit vector of [2 x double] containing one of the source operands.\n" |
| 43188 | "/// The horizontal difference of the values is stored in the upper bits of\n" |
| 43189 | "/// the destination.\n" |
| 43190 | "/// \\returns A 128-bit vector of [2 x double] containing the horizontal\n" |
| 43191 | "/// differences of both operands.\n" |
| 43192 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 43193 | "_mm_hsub_pd(__m128d __a, __m128d __b)\n" |
| 43194 | "{\n" |
| 43195 | " return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);\n" |
| 43196 | "}\n" |
| 43197 | "\n" |
| 43198 | "/// Moves and duplicates one double-precision value to double-precision\n" |
| 43199 | "/// values stored in a 128-bit vector of [2 x double].\n" |
| 43200 | "///\n" |
| 43201 | "/// \\headerfile <x86intrin.h>\n" |
| 43202 | "///\n" |
| 43203 | "/// \\code\n" |
| 43204 | "/// __m128d _mm_loaddup_pd(double const *dp);\n" |
| 43205 | "/// \\endcode\n" |
| 43206 | "///\n" |
| 43207 | "/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n" |
| 43208 | "///\n" |
| 43209 | "/// \\param dp\n" |
| 43210 | "/// A pointer to a double-precision value to be moved and duplicated.\n" |
| 43211 | "/// \\returns A 128-bit vector of [2 x double] containing the moved and\n" |
| 43212 | "/// duplicated values.\n" |
| 43213 | "#define _mm_loaddup_pd(dp) _mm_load1_pd(dp)\n" |
| 43214 | "\n" |
| 43215 | "/// Moves and duplicates the double-precision value in the lower bits of\n" |
| 43216 | "/// a 128-bit vector of [2 x double] to double-precision values stored in a\n" |
| 43217 | "/// 128-bit vector of [2 x double].\n" |
| 43218 | "///\n" |
| 43219 | "/// \\headerfile <x86intrin.h>\n" |
| 43220 | "///\n" |
| 43221 | "/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.\n" |
| 43222 | "///\n" |
| 43223 | "/// \\param __a\n" |
| 43224 | "/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits\n" |
| 43225 | "/// [127:64] and [63:0] of the destination.\n" |
| 43226 | "/// \\returns A 128-bit vector of [2 x double] containing the moved and\n" |
| 43227 | "/// duplicated values.\n" |
| 43228 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 43229 | "_mm_movedup_pd(__m128d __a)\n" |
| 43230 | "{\n" |
| 43231 | " return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);\n" |
| 43232 | "}\n" |
| 43233 | "\n" |
| 43234 | "/// Establishes a linear address memory range to be monitored and puts\n" |
| 43235 | "/// the processor in the monitor event pending state. Data stored in the\n" |
| 43236 | "/// monitored address range causes the processor to exit the pending state.\n" |
| 43237 | "///\n" |
| 43238 | "/// \\headerfile <x86intrin.h>\n" |
| 43239 | "///\n" |
| 43240 | "/// This intrinsic corresponds to the <c> MONITOR </c> instruction.\n" |
| 43241 | "///\n" |
| 43242 | "/// \\param __p\n" |
| 43243 | "/// The memory range to be monitored. The size of the range is determined by\n" |
| 43244 | "/// CPUID function 0000_0005h.\n" |
| 43245 | "/// \\param __extensions\n" |
| 43246 | "/// Optional extensions for the monitoring state.\n" |
| 43247 | "/// \\param __hints\n" |
| 43248 | "/// Optional hints for the monitoring state.\n" |
| 43249 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 43250 | "_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)\n" |
| 43251 | "{\n" |
| 43252 | " __builtin_ia32_monitor((void *)__p, __extensions, __hints);\n" |
| 43253 | "}\n" |
| 43254 | "\n" |
| 43255 | "/// Used with the MONITOR instruction to wait while the processor is in\n" |
| 43256 | "/// the monitor event pending state. Data stored in the monitored address\n" |
| 43257 | "/// range causes the processor to exit the pending state.\n" |
| 43258 | "///\n" |
| 43259 | "/// \\headerfile <x86intrin.h>\n" |
| 43260 | "///\n" |
| 43261 | "/// This intrinsic corresponds to the <c> MWAIT </c> instruction.\n" |
| 43262 | "///\n" |
| 43263 | "/// \\param __extensions\n" |
| 43264 | "/// Optional extensions for the monitoring state, which may vary by\n" |
| 43265 | "/// processor.\n" |
| 43266 | "/// \\param __hints\n" |
| 43267 | "/// Optional hints for the monitoring state, which may vary by processor.\n" |
| 43268 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 43269 | "_mm_mwait(unsigned __extensions, unsigned __hints)\n" |
| 43270 | "{\n" |
| 43271 | " __builtin_ia32_mwait(__extensions, __hints);\n" |
| 43272 | "}\n" |
| 43273 | "\n" |
| 43274 | "#undef __DEFAULT_FN_ATTRS\n" |
| 43275 | "\n" |
| 43276 | "#endif /* __PMMINTRIN_H */\n" |
| 43277 | "" } , |
| 43278 | { "/builtins/popcntintrin.h" , "/*===---- popcntintrin.h - POPCNT intrinsics -------------------------------===\n" |
| 43279 | " *\n" |
| 43280 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43281 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43282 | " * in the Software without restriction, including without limitation the rights\n" |
| 43283 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43284 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43285 | " * furnished to do so, subject to the following conditions:\n" |
| 43286 | " *\n" |
| 43287 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43288 | " * all copies or substantial portions of the Software.\n" |
| 43289 | " *\n" |
| 43290 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43291 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43292 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43293 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43294 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43295 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43296 | " * THE SOFTWARE.\n" |
| 43297 | " *\n" |
| 43298 | " *===-----------------------------------------------------------------------===\n" |
| 43299 | " */\n" |
| 43300 | "\n" |
| 43301 | "#ifndef __POPCNTINTRIN_H\n" |
| 43302 | "#define __POPCNTINTRIN_H\n" |
| 43303 | "\n" |
| 43304 | "/* Define the default attributes for the functions in this file. */\n" |
| 43305 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"popcnt\")))\n" |
| 43306 | "\n" |
| 43307 | "/// Counts the number of bits in the source operand having a value of 1.\n" |
| 43308 | "///\n" |
| 43309 | "/// \\headerfile <x86intrin.h>\n" |
| 43310 | "///\n" |
| 43311 | "/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n" |
| 43312 | "///\n" |
| 43313 | "/// \\param __A\n" |
| 43314 | "/// An unsigned 32-bit integer operand.\n" |
| 43315 | "/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n" |
| 43316 | "/// source operand.\n" |
| 43317 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 43318 | "_mm_popcnt_u32(unsigned int __A)\n" |
| 43319 | "{\n" |
| 43320 | " return __builtin_popcount(__A);\n" |
| 43321 | "}\n" |
| 43322 | "\n" |
| 43323 | "/// Counts the number of bits in the source operand having a value of 1.\n" |
| 43324 | "///\n" |
| 43325 | "/// \\headerfile <x86intrin.h>\n" |
| 43326 | "///\n" |
| 43327 | "/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n" |
| 43328 | "///\n" |
| 43329 | "/// \\param __A\n" |
| 43330 | "/// A signed 32-bit integer operand.\n" |
| 43331 | "/// \\returns A 32-bit integer containing the number of bits with value 1 in the\n" |
| 43332 | "/// source operand.\n" |
| 43333 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 43334 | "_popcnt32(int __A)\n" |
| 43335 | "{\n" |
| 43336 | " return __builtin_popcount(__A);\n" |
| 43337 | "}\n" |
| 43338 | "\n" |
| 43339 | "#ifdef __x86_64__\n" |
| 43340 | "/// Counts the number of bits in the source operand having a value of 1.\n" |
| 43341 | "///\n" |
| 43342 | "/// \\headerfile <x86intrin.h>\n" |
| 43343 | "///\n" |
| 43344 | "/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n" |
| 43345 | "///\n" |
| 43346 | "/// \\param __A\n" |
| 43347 | "/// An unsigned 64-bit integer operand.\n" |
| 43348 | "/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n" |
| 43349 | "/// source operand.\n" |
| 43350 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
| 43351 | "_mm_popcnt_u64(unsigned long long __A)\n" |
| 43352 | "{\n" |
| 43353 | " return __builtin_popcountll(__A);\n" |
| 43354 | "}\n" |
| 43355 | "\n" |
| 43356 | "/// Counts the number of bits in the source operand having a value of 1.\n" |
| 43357 | "///\n" |
| 43358 | "/// \\headerfile <x86intrin.h>\n" |
| 43359 | "///\n" |
| 43360 | "/// This intrinsic corresponds to the <c> POPCNT </c> instruction.\n" |
| 43361 | "///\n" |
| 43362 | "/// \\param __A\n" |
| 43363 | "/// A signed 64-bit integer operand.\n" |
| 43364 | "/// \\returns A 64-bit integer containing the number of bits with value 1 in the\n" |
| 43365 | "/// source operand.\n" |
| 43366 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
| 43367 | "_popcnt64(long long __A)\n" |
| 43368 | "{\n" |
| 43369 | " return __builtin_popcountll(__A);\n" |
| 43370 | "}\n" |
| 43371 | "#endif /* __x86_64__ */\n" |
| 43372 | "\n" |
| 43373 | "#undef __DEFAULT_FN_ATTRS\n" |
| 43374 | "\n" |
| 43375 | "#endif /* __POPCNTINTRIN_H */\n" |
| 43376 | "" } , |
| 43377 | { "/builtins/prfchwintrin.h" , "/*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------===\n" |
| 43378 | " *\n" |
| 43379 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43380 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43381 | " * in the Software without restriction, including without limitation the rights\n" |
| 43382 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43383 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43384 | " * furnished to do so, subject to the following conditions:\n" |
| 43385 | " *\n" |
| 43386 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43387 | " * all copies or substantial portions of the Software.\n" |
| 43388 | " *\n" |
| 43389 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43390 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43391 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43392 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43393 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43394 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43395 | " * THE SOFTWARE.\n" |
| 43396 | " *\n" |
| 43397 | " *===-----------------------------------------------------------------------===\n" |
| 43398 | " */\n" |
| 43399 | "\n" |
| 43400 | "#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)\n" |
| 43401 | "#error \"Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead.\"\n" |
| 43402 | "#endif\n" |
| 43403 | "\n" |
| 43404 | "#ifndef __PRFCHWINTRIN_H\n" |
| 43405 | "#define __PRFCHWINTRIN_H\n" |
| 43406 | "\n" |
| 43407 | "/// Loads a memory sequence containing the specified memory address into\n" |
| 43408 | "/// all data cache levels. The cache-coherency state is set to exclusive.\n" |
| 43409 | "/// Data can be read from and written to the cache line without additional\n" |
| 43410 | "/// delay.\n" |
| 43411 | "///\n" |
| 43412 | "/// \\headerfile <x86intrin.h>\n" |
| 43413 | "///\n" |
| 43414 | "/// This intrinsic corresponds to the \\c PREFETCHT0 instruction.\n" |
| 43415 | "///\n" |
| 43416 | "/// \\param __P\n" |
| 43417 | "/// A pointer specifying the memory address to be prefetched.\n" |
| 43418 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
| 43419 | "_m_prefetch(void *__P)\n" |
| 43420 | "{\n" |
| 43421 | " __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);\n" |
| 43422 | "}\n" |
| 43423 | "\n" |
| 43424 | "/// Loads a memory sequence containing the specified memory address into\n" |
| 43425 | "/// the L1 data cache and sets the cache-coherency to modified. This\n" |
| 43426 | "/// provides a hint to the processor that the cache line will be modified.\n" |
| 43427 | "/// It is intended for use when the cache line will be written to shortly\n" |
| 43428 | "/// after the prefetch is performed.\n" |
| 43429 | "///\n" |
| 43430 | "/// Note that the effect of this intrinsic is dependent on the processor\n" |
| 43431 | "/// implementation.\n" |
| 43432 | "///\n" |
| 43433 | "/// \\headerfile <x86intrin.h>\n" |
| 43434 | "///\n" |
| 43435 | "/// This intrinsic corresponds to the \\c PREFETCHW instruction.\n" |
| 43436 | "///\n" |
| 43437 | "/// \\param __P\n" |
| 43438 | "/// A pointer specifying the memory address to be prefetched.\n" |
| 43439 | "static __inline__ void __attribute__((__always_inline__, __nodebug__))\n" |
| 43440 | "_m_prefetchw(void *__P)\n" |
| 43441 | "{\n" |
| 43442 | " __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);\n" |
| 43443 | "}\n" |
| 43444 | "\n" |
| 43445 | "#endif /* __PRFCHWINTRIN_H */\n" |
| 43446 | "" } , |
| 43447 | { "/builtins/ptwriteintrin.h" , "/*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------===\n" |
| 43448 | " *\n" |
| 43449 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43450 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43451 | " * in the Software without restriction, including without limitation the rights\n" |
| 43452 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43453 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43454 | " * furnished to do so, subject to the following conditions:\n" |
| 43455 | " *\n" |
| 43456 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43457 | " * all copies or substantial portions of the Software.\n" |
| 43458 | " *\n" |
| 43459 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43460 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43461 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43462 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43463 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43464 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43465 | " * THE SOFTWARE.\n" |
| 43466 | " *\n" |
| 43467 | " *===-----------------------------------------------------------------------===\n" |
| 43468 | " */\n" |
| 43469 | "\n" |
| 43470 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 43471 | "#error \"Never use <ptwriteintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 43472 | "#endif\n" |
| 43473 | "\n" |
| 43474 | "#ifndef __PTWRITEINTRIN_H\n" |
| 43475 | "#define __PTWRITEINTRIN_H\n" |
| 43476 | "\n" |
| 43477 | "/* Define the default attributes for the functions in this file. */\n" |
| 43478 | "#define __DEFAULT_FN_ATTRS \\\n" |
| 43479 | " __attribute__((__always_inline__, __nodebug__, __target__(\"ptwrite\")))\n" |
| 43480 | "\n" |
| 43481 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 43482 | "_ptwrite32(unsigned int __value) {\n" |
| 43483 | " __builtin_ia32_ptwrite32(__value);\n" |
| 43484 | "}\n" |
| 43485 | "\n" |
| 43486 | "#ifdef __x86_64__\n" |
| 43487 | "\n" |
| 43488 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 43489 | "_ptwrite64(unsigned long long __value) {\n" |
| 43490 | " __builtin_ia32_ptwrite64(__value);\n" |
| 43491 | "}\n" |
| 43492 | "\n" |
| 43493 | "#endif /* __x86_64__ */\n" |
| 43494 | "\n" |
| 43495 | "#undef __DEFAULT_FN_ATTRS\n" |
| 43496 | "\n" |
| 43497 | "#endif /* __PTWRITEINTRIN_H */\n" |
| 43498 | "" } , |
| 43499 | { "/builtins/rdseedintrin.h" , "/*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------===\n" |
| 43500 | " *\n" |
| 43501 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43502 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43503 | " * in the Software without restriction, including without limitation the rights\n" |
| 43504 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43505 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43506 | " * furnished to do so, subject to the following conditions:\n" |
| 43507 | " *\n" |
| 43508 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43509 | " * all copies or substantial portions of the Software.\n" |
| 43510 | " *\n" |
| 43511 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43512 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43513 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43514 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43515 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43516 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43517 | " * THE SOFTWARE.\n" |
| 43518 | " *\n" |
| 43519 | " *===-----------------------------------------------------------------------===\n" |
| 43520 | " */\n" |
| 43521 | "\n" |
| 43522 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 43523 | "#error \"Never use <rdseedintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 43524 | "#endif\n" |
| 43525 | "\n" |
| 43526 | "#ifndef __RDSEEDINTRIN_H\n" |
| 43527 | "#define __RDSEEDINTRIN_H\n" |
| 43528 | "\n" |
| 43529 | "/* Define the default attributes for the functions in this file. */\n" |
| 43530 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rdseed\")))\n" |
| 43531 | "\n" |
| 43532 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 43533 | "_rdseed16_step(unsigned short *__p)\n" |
| 43534 | "{\n" |
| 43535 | " return __builtin_ia32_rdseed16_step(__p);\n" |
| 43536 | "}\n" |
| 43537 | "\n" |
| 43538 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 43539 | "_rdseed32_step(unsigned int *__p)\n" |
| 43540 | "{\n" |
| 43541 | " return __builtin_ia32_rdseed32_step(__p);\n" |
| 43542 | "}\n" |
| 43543 | "\n" |
| 43544 | "#ifdef __x86_64__\n" |
| 43545 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 43546 | "_rdseed64_step(unsigned long long *__p)\n" |
| 43547 | "{\n" |
| 43548 | " return __builtin_ia32_rdseed64_step(__p);\n" |
| 43549 | "}\n" |
| 43550 | "#endif\n" |
| 43551 | "\n" |
| 43552 | "#undef __DEFAULT_FN_ATTRS\n" |
| 43553 | "\n" |
| 43554 | "#endif /* __RDSEEDINTRIN_H */\n" |
| 43555 | "" } , |
| 43556 | { "/builtins/rtmintrin.h" , "/*===---- rtmintrin.h - RTM intrinsics -------------------------------------===\n" |
| 43557 | " *\n" |
| 43558 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43559 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43560 | " * in the Software without restriction, including without limitation the rights\n" |
| 43561 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43562 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43563 | " * furnished to do so, subject to the following conditions:\n" |
| 43564 | " *\n" |
| 43565 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43566 | " * all copies or substantial portions of the Software.\n" |
| 43567 | " *\n" |
| 43568 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43569 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43570 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43571 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43572 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43573 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43574 | " * THE SOFTWARE.\n" |
| 43575 | " *\n" |
| 43576 | " *===-----------------------------------------------------------------------===\n" |
| 43577 | " */\n" |
| 43578 | "\n" |
| 43579 | "#ifndef __IMMINTRIN_H\n" |
| 43580 | "#error \"Never use <rtmintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 43581 | "#endif\n" |
| 43582 | "\n" |
| 43583 | "#ifndef __RTMINTRIN_H\n" |
| 43584 | "#define __RTMINTRIN_H\n" |
| 43585 | "\n" |
| 43586 | "#define _XBEGIN_STARTED (~0u)\n" |
| 43587 | "#define _XABORT_EXPLICIT (1 << 0)\n" |
| 43588 | "#define _XABORT_RETRY (1 << 1)\n" |
| 43589 | "#define _XABORT_CONFLICT (1 << 2)\n" |
| 43590 | "#define _XABORT_CAPACITY (1 << 3)\n" |
| 43591 | "#define _XABORT_DEBUG (1 << 4)\n" |
| 43592 | "#define _XABORT_NESTED (1 << 5)\n" |
| 43593 | "#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)\n" |
| 43594 | "\n" |
| 43595 | "/* Define the default attributes for the functions in this file. */\n" |
| 43596 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n" |
| 43597 | "\n" |
| 43598 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 43599 | "_xbegin(void)\n" |
| 43600 | "{\n" |
| 43601 | " return __builtin_ia32_xbegin();\n" |
| 43602 | "}\n" |
| 43603 | "\n" |
| 43604 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 43605 | "_xend(void)\n" |
| 43606 | "{\n" |
| 43607 | " __builtin_ia32_xend();\n" |
| 43608 | "}\n" |
| 43609 | "\n" |
| 43610 | "#define _xabort(imm) __builtin_ia32_xabort((imm))\n" |
| 43611 | "\n" |
| 43612 | "#undef __DEFAULT_FN_ATTRS\n" |
| 43613 | "\n" |
| 43614 | "#endif /* __RTMINTRIN_H */\n" |
| 43615 | "" } , |
| 43616 | { "/builtins/s390intrin.h" , "/*===---- s390intrin.h - SystemZ intrinsics --------------------------------===\n" |
| 43617 | " *\n" |
| 43618 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43619 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43620 | " * in the Software without restriction, including without limitation the rights\n" |
| 43621 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43622 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43623 | " * furnished to do so, subject to the following conditions:\n" |
| 43624 | " *\n" |
| 43625 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43626 | " * all copies or substantial portions of the Software.\n" |
| 43627 | " *\n" |
| 43628 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43629 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43630 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43631 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43632 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43633 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43634 | " * THE SOFTWARE.\n" |
| 43635 | " *\n" |
| 43636 | " *===-----------------------------------------------------------------------===\n" |
| 43637 | " */\n" |
| 43638 | "\n" |
| 43639 | "#ifndef __S390INTRIN_H\n" |
| 43640 | "#define __S390INTRIN_H\n" |
| 43641 | "\n" |
| 43642 | "#ifndef __s390__\n" |
| 43643 | "#error \"<s390intrin.h> is for s390 only\"\n" |
| 43644 | "#endif\n" |
| 43645 | "\n" |
| 43646 | "#ifdef __HTM__\n" |
| 43647 | "#include <htmintrin.h>\n" |
| 43648 | "#endif\n" |
| 43649 | "\n" |
| 43650 | "#ifdef __VEC__\n" |
| 43651 | "#include <vecintrin.h>\n" |
| 43652 | "#endif\n" |
| 43653 | "\n" |
| 43654 | "#endif /* __S390INTRIN_H*/\n" |
| 43655 | "" } , |
| 43656 | { "/builtins/sgxintrin.h" , "/*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------===\n" |
| 43657 | " *\n" |
| 43658 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43659 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43660 | " * in the Software without restriction, including without limitation the rights\n" |
| 43661 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43662 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43663 | " * furnished to do so, subject to the following conditions:\n" |
| 43664 | " *\n" |
| 43665 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43666 | " * all copies or substantial portions of the Software.\n" |
| 43667 | " *\n" |
| 43668 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43669 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43670 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43671 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43672 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43673 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43674 | " * THE SOFTWARE.\n" |
| 43675 | " *\n" |
| 43676 | " *===-----------------------------------------------------------------------===\n" |
| 43677 | " */\n" |
| 43678 | "\n" |
| 43679 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 43680 | "#error \"Never use <sgxintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 43681 | "#endif\n" |
| 43682 | "\n" |
| 43683 | "#ifndef __SGXINTRIN_H\n" |
| 43684 | "#define __SGXINTRIN_H\n" |
| 43685 | "\n" |
| 43686 | "/* Define the default attributes for the functions in this file. */\n" |
| 43687 | "#define __DEFAULT_FN_ATTRS \\\n" |
| 43688 | " __attribute__((__always_inline__, __nodebug__, __target__(\"sgx\")))\n" |
| 43689 | "\n" |
| 43690 | "static __inline unsigned int __DEFAULT_FN_ATTRS\n" |
| 43691 | "_enclu_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n" |
| 43692 | "{\n" |
| 43693 | " unsigned int __result;\n" |
| 43694 | " __asm__ (\"enclu\"\n" |
| 43695 | " : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n" |
| 43696 | " : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n" |
| 43697 | " : \"cc\");\n" |
| 43698 | " return __result;\n" |
| 43699 | "}\n" |
| 43700 | "\n" |
| 43701 | "static __inline unsigned int __DEFAULT_FN_ATTRS\n" |
| 43702 | "_encls_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n" |
| 43703 | "{\n" |
| 43704 | " unsigned int __result;\n" |
| 43705 | " __asm__ (\"encls\"\n" |
| 43706 | " : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n" |
| 43707 | " : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n" |
| 43708 | " : \"cc\");\n" |
| 43709 | " return __result;\n" |
| 43710 | "}\n" |
| 43711 | "\n" |
| 43712 | "static __inline unsigned int __DEFAULT_FN_ATTRS\n" |
| 43713 | "_enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[])\n" |
| 43714 | "{\n" |
| 43715 | " unsigned int __result;\n" |
| 43716 | " __asm__ (\"enclv\"\n" |
| 43717 | " : \"=a\" (__result), \"=b\" (__d[0]), \"=c\" (__d[1]), \"=d\" (__d[2])\n" |
| 43718 | " : \"a\" (__leaf), \"b\" (__d[0]), \"c\" (__d[1]), \"d\" (__d[2])\n" |
| 43719 | " : \"cc\");\n" |
| 43720 | " return __result;\n" |
| 43721 | "}\n" |
| 43722 | "\n" |
| 43723 | "#undef __DEFAULT_FN_ATTRS\n" |
| 43724 | "\n" |
| 43725 | "#endif\n" |
| 43726 | "" } , |
| 43727 | { "/builtins/shaintrin.h" , "/*===---- shaintrin.h - SHA intrinsics -------------------------------------===\n" |
| 43728 | " *\n" |
| 43729 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43730 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43731 | " * in the Software without restriction, including without limitation the rights\n" |
| 43732 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43733 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43734 | " * furnished to do so, subject to the following conditions:\n" |
| 43735 | " *\n" |
| 43736 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43737 | " * all copies or substantial portions of the Software.\n" |
| 43738 | " *\n" |
| 43739 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43740 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43741 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43742 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43743 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43744 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43745 | " * THE SOFTWARE.\n" |
| 43746 | " *\n" |
| 43747 | " *===-----------------------------------------------------------------------===\n" |
| 43748 | " */\n" |
| 43749 | "\n" |
| 43750 | "#ifndef __IMMINTRIN_H\n" |
| 43751 | "#error \"Never use <shaintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 43752 | "#endif\n" |
| 43753 | "\n" |
| 43754 | "#ifndef __SHAINTRIN_H\n" |
| 43755 | "#define __SHAINTRIN_H\n" |
| 43756 | "\n" |
| 43757 | "/* Define the default attributes for the functions in this file. */\n" |
| 43758 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sha\"), __min_vector_width__(128)))\n" |
| 43759 | "\n" |
| 43760 | "#define _mm_sha1rnds4_epu32(V1, V2, M) \\\n" |
| 43761 | " __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M))\n" |
| 43762 | "\n" |
| 43763 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 43764 | "_mm_sha1nexte_epu32(__m128i __X, __m128i __Y)\n" |
| 43765 | "{\n" |
| 43766 | " return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);\n" |
| 43767 | "}\n" |
| 43768 | "\n" |
| 43769 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 43770 | "_mm_sha1msg1_epu32(__m128i __X, __m128i __Y)\n" |
| 43771 | "{\n" |
| 43772 | " return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);\n" |
| 43773 | "}\n" |
| 43774 | "\n" |
| 43775 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 43776 | "_mm_sha1msg2_epu32(__m128i __X, __m128i __Y)\n" |
| 43777 | "{\n" |
| 43778 | " return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);\n" |
| 43779 | "}\n" |
| 43780 | "\n" |
| 43781 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 43782 | "_mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)\n" |
| 43783 | "{\n" |
| 43784 | " return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);\n" |
| 43785 | "}\n" |
| 43786 | "\n" |
| 43787 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 43788 | "_mm_sha256msg1_epu32(__m128i __X, __m128i __Y)\n" |
| 43789 | "{\n" |
| 43790 | " return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);\n" |
| 43791 | "}\n" |
| 43792 | "\n" |
| 43793 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 43794 | "_mm_sha256msg2_epu32(__m128i __X, __m128i __Y)\n" |
| 43795 | "{\n" |
| 43796 | " return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);\n" |
| 43797 | "}\n" |
| 43798 | "\n" |
| 43799 | "#undef __DEFAULT_FN_ATTRS\n" |
| 43800 | "\n" |
| 43801 | "#endif /* __SHAINTRIN_H */\n" |
| 43802 | "" } , |
| 43803 | { "/builtins/smmintrin.h" , "/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===\n" |
| 43804 | " *\n" |
| 43805 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 43806 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 43807 | " * in the Software without restriction, including without limitation the rights\n" |
| 43808 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 43809 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 43810 | " * furnished to do so, subject to the following conditions:\n" |
| 43811 | " *\n" |
| 43812 | " * The above copyright notice and this permission notice shall be included in\n" |
| 43813 | " * all copies or substantial portions of the Software.\n" |
| 43814 | " *\n" |
| 43815 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 43816 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 43817 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 43818 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 43819 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 43820 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 43821 | " * THE SOFTWARE.\n" |
| 43822 | " *\n" |
| 43823 | " *===-----------------------------------------------------------------------===\n" |
| 43824 | " */\n" |
| 43825 | "\n" |
| 43826 | "#ifndef __SMMINTRIN_H\n" |
| 43827 | "#define __SMMINTRIN_H\n" |
| 43828 | "\n" |
| 43829 | "#include <tmmintrin.h>\n" |
| 43830 | "\n" |
| 43831 | "/* Define the default attributes for the functions in this file. */\n" |
| 43832 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.1\"), __min_vector_width__(128)))\n" |
| 43833 | "\n" |
| 43834 | "/* SSE4 Rounding macros. */\n" |
| 43835 | "#define _MM_FROUND_TO_NEAREST_INT 0x00\n" |
| 43836 | "#define _MM_FROUND_TO_NEG_INF 0x01\n" |
| 43837 | "#define _MM_FROUND_TO_POS_INF 0x02\n" |
| 43838 | "#define _MM_FROUND_TO_ZERO 0x03\n" |
| 43839 | "#define _MM_FROUND_CUR_DIRECTION 0x04\n" |
| 43840 | "\n" |
| 43841 | "#define _MM_FROUND_RAISE_EXC 0x00\n" |
| 43842 | "#define _MM_FROUND_NO_EXC 0x08\n" |
| 43843 | "\n" |
| 43844 | "#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)\n" |
| 43845 | "#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)\n" |
| 43846 | "#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)\n" |
| 43847 | "#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)\n" |
| 43848 | "#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)\n" |
| 43849 | "#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)\n" |
| 43850 | "\n" |
| 43851 | "/// Rounds up each element of the 128-bit vector of [4 x float] to an\n" |
| 43852 | "/// integer and returns the rounded values in a 128-bit vector of\n" |
| 43853 | "/// [4 x float].\n" |
| 43854 | "///\n" |
| 43855 | "/// \\headerfile <x86intrin.h>\n" |
| 43856 | "///\n" |
| 43857 | "/// \\code\n" |
| 43858 | "/// __m128 _mm_ceil_ps(__m128 X);\n" |
| 43859 | "/// \\endcode\n" |
| 43860 | "///\n" |
| 43861 | "/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n" |
| 43862 | "///\n" |
| 43863 | "/// \\param X\n" |
| 43864 | "/// A 128-bit vector of [4 x float] values to be rounded up.\n" |
| 43865 | "/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n" |
| 43866 | "#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL)\n" |
| 43867 | "\n" |
| 43868 | "/// Rounds up each element of the 128-bit vector of [2 x double] to an\n" |
| 43869 | "/// integer and returns the rounded values in a 128-bit vector of\n" |
| 43870 | "/// [2 x double].\n" |
| 43871 | "///\n" |
| 43872 | "/// \\headerfile <x86intrin.h>\n" |
| 43873 | "///\n" |
| 43874 | "/// \\code\n" |
| 43875 | "/// __m128d _mm_ceil_pd(__m128d X);\n" |
| 43876 | "/// \\endcode\n" |
| 43877 | "///\n" |
| 43878 | "/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n" |
| 43879 | "///\n" |
| 43880 | "/// \\param X\n" |
| 43881 | "/// A 128-bit vector of [2 x double] values to be rounded up.\n" |
| 43882 | "/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n" |
| 43883 | "#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL)\n" |
| 43884 | "\n" |
| 43885 | "/// Copies three upper elements of the first 128-bit vector operand to\n" |
| 43886 | "/// the corresponding three upper elements of the 128-bit result vector of\n" |
| 43887 | "/// [4 x float]. Rounds up the lowest element of the second 128-bit vector\n" |
| 43888 | "/// operand to an integer and copies it to the lowest element of the 128-bit\n" |
| 43889 | "/// result vector of [4 x float].\n" |
| 43890 | "///\n" |
| 43891 | "/// \\headerfile <x86intrin.h>\n" |
| 43892 | "///\n" |
| 43893 | "/// \\code\n" |
| 43894 | "/// __m128 _mm_ceil_ss(__m128 X, __m128 Y);\n" |
| 43895 | "/// \\endcode\n" |
| 43896 | "///\n" |
| 43897 | "/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n" |
| 43898 | "///\n" |
| 43899 | "/// \\param X\n" |
| 43900 | "/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n" |
| 43901 | "/// copied to the corresponding bits of the result.\n" |
| 43902 | "/// \\param Y\n" |
| 43903 | "/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n" |
| 43904 | "/// rounded up to the nearest integer and copied to the corresponding bits\n" |
| 43905 | "/// of the result.\n" |
| 43906 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n" |
| 43907 | "/// values.\n" |
| 43908 | "#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL)\n" |
| 43909 | "\n" |
| 43910 | "/// Copies the upper element of the first 128-bit vector operand to the\n" |
| 43911 | "/// corresponding upper element of the 128-bit result vector of [2 x double].\n" |
| 43912 | "/// Rounds up the lower element of the second 128-bit vector operand to an\n" |
| 43913 | "/// integer and copies it to the lower element of the 128-bit result vector\n" |
| 43914 | "/// of [2 x double].\n" |
| 43915 | "///\n" |
| 43916 | "/// \\headerfile <x86intrin.h>\n" |
| 43917 | "///\n" |
| 43918 | "/// \\code\n" |
| 43919 | "/// __m128d _mm_ceil_sd(__m128d X, __m128d Y);\n" |
| 43920 | "/// \\endcode\n" |
| 43921 | "///\n" |
| 43922 | "/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n" |
| 43923 | "///\n" |
| 43924 | "/// \\param X\n" |
| 43925 | "/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n" |
| 43926 | "/// copied to the corresponding bits of the result.\n" |
| 43927 | "/// \\param Y\n" |
| 43928 | "/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n" |
| 43929 | "/// rounded up to the nearest integer and copied to the corresponding bits\n" |
| 43930 | "/// of the result.\n" |
| 43931 | "/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n" |
| 43932 | "/// values.\n" |
| 43933 | "#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL)\n" |
| 43934 | "\n" |
| 43935 | "/// Rounds down each element of the 128-bit vector of [4 x float] to an\n" |
| 43936 | "/// an integer and returns the rounded values in a 128-bit vector of\n" |
| 43937 | "/// [4 x float].\n" |
| 43938 | "///\n" |
| 43939 | "/// \\headerfile <x86intrin.h>\n" |
| 43940 | "///\n" |
| 43941 | "/// \\code\n" |
| 43942 | "/// __m128 _mm_floor_ps(__m128 X);\n" |
| 43943 | "/// \\endcode\n" |
| 43944 | "///\n" |
| 43945 | "/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n" |
| 43946 | "///\n" |
| 43947 | "/// \\param X\n" |
| 43948 | "/// A 128-bit vector of [4 x float] values to be rounded down.\n" |
| 43949 | "/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n" |
| 43950 | "#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR)\n" |
| 43951 | "\n" |
| 43952 | "/// Rounds down each element of the 128-bit vector of [2 x double] to an\n" |
| 43953 | "/// integer and returns the rounded values in a 128-bit vector of\n" |
| 43954 | "/// [2 x double].\n" |
| 43955 | "///\n" |
| 43956 | "/// \\headerfile <x86intrin.h>\n" |
| 43957 | "///\n" |
| 43958 | "/// \\code\n" |
| 43959 | "/// __m128d _mm_floor_pd(__m128d X);\n" |
| 43960 | "/// \\endcode\n" |
| 43961 | "///\n" |
| 43962 | "/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n" |
| 43963 | "///\n" |
| 43964 | "/// \\param X\n" |
| 43965 | "/// A 128-bit vector of [2 x double].\n" |
| 43966 | "/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n" |
| 43967 | "#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR)\n" |
| 43968 | "\n" |
| 43969 | "/// Copies three upper elements of the first 128-bit vector operand to\n" |
| 43970 | "/// the corresponding three upper elements of the 128-bit result vector of\n" |
| 43971 | "/// [4 x float]. Rounds down the lowest element of the second 128-bit vector\n" |
| 43972 | "/// operand to an integer and copies it to the lowest element of the 128-bit\n" |
| 43973 | "/// result vector of [4 x float].\n" |
| 43974 | "///\n" |
| 43975 | "/// \\headerfile <x86intrin.h>\n" |
| 43976 | "///\n" |
| 43977 | "/// \\code\n" |
| 43978 | "/// __m128 _mm_floor_ss(__m128 X, __m128 Y);\n" |
| 43979 | "/// \\endcode\n" |
| 43980 | "///\n" |
| 43981 | "/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n" |
| 43982 | "///\n" |
| 43983 | "/// \\param X\n" |
| 43984 | "/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n" |
| 43985 | "/// copied to the corresponding bits of the result.\n" |
| 43986 | "/// \\param Y\n" |
| 43987 | "/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n" |
| 43988 | "/// rounded down to the nearest integer and copied to the corresponding bits\n" |
| 43989 | "/// of the result.\n" |
| 43990 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n" |
| 43991 | "/// values.\n" |
| 43992 | "#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)\n" |
| 43993 | "\n" |
| 43994 | "/// Copies the upper element of the first 128-bit vector operand to the\n" |
| 43995 | "/// corresponding upper element of the 128-bit result vector of [2 x double].\n" |
| 43996 | "/// Rounds down the lower element of the second 128-bit vector operand to an\n" |
| 43997 | "/// integer and copies it to the lower element of the 128-bit result vector\n" |
| 43998 | "/// of [2 x double].\n" |
| 43999 | "///\n" |
| 44000 | "/// \\headerfile <x86intrin.h>\n" |
| 44001 | "///\n" |
| 44002 | "/// \\code\n" |
| 44003 | "/// __m128d _mm_floor_sd(__m128d X, __m128d Y);\n" |
| 44004 | "/// \\endcode\n" |
| 44005 | "///\n" |
| 44006 | "/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n" |
| 44007 | "///\n" |
| 44008 | "/// \\param X\n" |
| 44009 | "/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n" |
| 44010 | "/// copied to the corresponding bits of the result.\n" |
| 44011 | "/// \\param Y\n" |
| 44012 | "/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n" |
| 44013 | "/// rounded down to the nearest integer and copied to the corresponding bits\n" |
| 44014 | "/// of the result.\n" |
| 44015 | "/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n" |
| 44016 | "/// values.\n" |
| 44017 | "#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)\n" |
| 44018 | "\n" |
| 44019 | "/// Rounds each element of the 128-bit vector of [4 x float] to an\n" |
| 44020 | "/// integer value according to the rounding control specified by the second\n" |
| 44021 | "/// argument and returns the rounded values in a 128-bit vector of\n" |
| 44022 | "/// [4 x float].\n" |
| 44023 | "///\n" |
| 44024 | "/// \\headerfile <x86intrin.h>\n" |
| 44025 | "///\n" |
| 44026 | "/// \\code\n" |
| 44027 | "/// __m128 _mm_round_ps(__m128 X, const int M);\n" |
| 44028 | "/// \\endcode\n" |
| 44029 | "///\n" |
| 44030 | "/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction.\n" |
| 44031 | "///\n" |
| 44032 | "/// \\param X\n" |
| 44033 | "/// A 128-bit vector of [4 x float].\n" |
| 44034 | "/// \\param M\n" |
| 44035 | "/// An integer value that specifies the rounding operation. \\n\n" |
| 44036 | "/// Bits [7:4] are reserved. \\n\n" |
| 44037 | "/// Bit [3] is a precision exception value: \\n\n" |
| 44038 | "/// 0: A normal PE exception is used \\n\n" |
| 44039 | "/// 1: The PE field is not updated \\n\n" |
| 44040 | "/// Bit [2] is the rounding control source: \\n\n" |
| 44041 | "/// 0: Use bits [1:0] of \\a M \\n\n" |
| 44042 | "/// 1: Use the current MXCSR setting \\n\n" |
| 44043 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
| 44044 | "/// 00: Nearest \\n\n" |
| 44045 | "/// 01: Downward (toward negative infinity) \\n\n" |
| 44046 | "/// 10: Upward (toward positive infinity) \\n\n" |
| 44047 | "/// 11: Truncated\n" |
| 44048 | "/// \\returns A 128-bit vector of [4 x float] containing the rounded values.\n" |
| 44049 | "#define _mm_round_ps(X, M) \\\n" |
| 44050 | " (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))\n" |
| 44051 | "\n" |
| 44052 | "/// Copies three upper elements of the first 128-bit vector operand to\n" |
| 44053 | "/// the corresponding three upper elements of the 128-bit result vector of\n" |
| 44054 | "/// [4 x float]. Rounds the lowest element of the second 128-bit vector\n" |
| 44055 | "/// operand to an integer value according to the rounding control specified\n" |
| 44056 | "/// by the third argument and copies it to the lowest element of the 128-bit\n" |
| 44057 | "/// result vector of [4 x float].\n" |
| 44058 | "///\n" |
| 44059 | "/// \\headerfile <x86intrin.h>\n" |
| 44060 | "///\n" |
| 44061 | "/// \\code\n" |
| 44062 | "/// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M);\n" |
| 44063 | "/// \\endcode\n" |
| 44064 | "///\n" |
| 44065 | "/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction.\n" |
| 44066 | "///\n" |
| 44067 | "/// \\param X\n" |
| 44068 | "/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are\n" |
| 44069 | "/// copied to the corresponding bits of the result.\n" |
| 44070 | "/// \\param Y\n" |
| 44071 | "/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is\n" |
| 44072 | "/// rounded to the nearest integer using the specified rounding control and\n" |
| 44073 | "/// copied to the corresponding bits of the result.\n" |
| 44074 | "/// \\param M\n" |
| 44075 | "/// An integer value that specifies the rounding operation. \\n\n" |
| 44076 | "/// Bits [7:4] are reserved. \\n\n" |
| 44077 | "/// Bit [3] is a precision exception value: \\n\n" |
| 44078 | "/// 0: A normal PE exception is used \\n\n" |
| 44079 | "/// 1: The PE field is not updated \\n\n" |
| 44080 | "/// Bit [2] is the rounding control source: \\n\n" |
| 44081 | "/// 0: Use bits [1:0] of \\a M \\n\n" |
| 44082 | "/// 1: Use the current MXCSR setting \\n\n" |
| 44083 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
| 44084 | "/// 00: Nearest \\n\n" |
| 44085 | "/// 01: Downward (toward negative infinity) \\n\n" |
| 44086 | "/// 10: Upward (toward positive infinity) \\n\n" |
| 44087 | "/// 11: Truncated\n" |
| 44088 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and rounded\n" |
| 44089 | "/// values.\n" |
| 44090 | "#define _mm_round_ss(X, Y, M) \\\n" |
| 44091 | " (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \\\n" |
| 44092 | " (__v4sf)(__m128)(Y), (M))\n" |
| 44093 | "\n" |
| 44094 | "/// Rounds each element of the 128-bit vector of [2 x double] to an\n" |
| 44095 | "/// integer value according to the rounding control specified by the second\n" |
| 44096 | "/// argument and returns the rounded values in a 128-bit vector of\n" |
| 44097 | "/// [2 x double].\n" |
| 44098 | "///\n" |
| 44099 | "/// \\headerfile <x86intrin.h>\n" |
| 44100 | "///\n" |
| 44101 | "/// \\code\n" |
| 44102 | "/// __m128d _mm_round_pd(__m128d X, const int M);\n" |
| 44103 | "/// \\endcode\n" |
| 44104 | "///\n" |
| 44105 | "/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction.\n" |
| 44106 | "///\n" |
| 44107 | "/// \\param X\n" |
| 44108 | "/// A 128-bit vector of [2 x double].\n" |
| 44109 | "/// \\param M\n" |
| 44110 | "/// An integer value that specifies the rounding operation. \\n\n" |
| 44111 | "/// Bits [7:4] are reserved. \\n\n" |
| 44112 | "/// Bit [3] is a precision exception value: \\n\n" |
| 44113 | "/// 0: A normal PE exception is used \\n\n" |
| 44114 | "/// 1: The PE field is not updated \\n\n" |
| 44115 | "/// Bit [2] is the rounding control source: \\n\n" |
| 44116 | "/// 0: Use bits [1:0] of \\a M \\n\n" |
| 44117 | "/// 1: Use the current MXCSR setting \\n\n" |
| 44118 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
| 44119 | "/// 00: Nearest \\n\n" |
| 44120 | "/// 01: Downward (toward negative infinity) \\n\n" |
| 44121 | "/// 10: Upward (toward positive infinity) \\n\n" |
| 44122 | "/// 11: Truncated\n" |
| 44123 | "/// \\returns A 128-bit vector of [2 x double] containing the rounded values.\n" |
| 44124 | "#define _mm_round_pd(X, M) \\\n" |
| 44125 | " (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))\n" |
| 44126 | "\n" |
| 44127 | "/// Copies the upper element of the first 128-bit vector operand to the\n" |
| 44128 | "/// corresponding upper element of the 128-bit result vector of [2 x double].\n" |
| 44129 | "/// Rounds the lower element of the second 128-bit vector operand to an\n" |
| 44130 | "/// integer value according to the rounding control specified by the third\n" |
| 44131 | "/// argument and copies it to the lower element of the 128-bit result vector\n" |
| 44132 | "/// of [2 x double].\n" |
| 44133 | "///\n" |
| 44134 | "/// \\headerfile <x86intrin.h>\n" |
| 44135 | "///\n" |
| 44136 | "/// \\code\n" |
| 44137 | "/// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M);\n" |
| 44138 | "/// \\endcode\n" |
| 44139 | "///\n" |
| 44140 | "/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction.\n" |
| 44141 | "///\n" |
| 44142 | "/// \\param X\n" |
| 44143 | "/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is\n" |
| 44144 | "/// copied to the corresponding bits of the result.\n" |
| 44145 | "/// \\param Y\n" |
| 44146 | "/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is\n" |
| 44147 | "/// rounded to the nearest integer using the specified rounding control and\n" |
| 44148 | "/// copied to the corresponding bits of the result.\n" |
| 44149 | "/// \\param M\n" |
| 44150 | "/// An integer value that specifies the rounding operation. \\n\n" |
| 44151 | "/// Bits [7:4] are reserved. \\n\n" |
| 44152 | "/// Bit [3] is a precision exception value: \\n\n" |
| 44153 | "/// 0: A normal PE exception is used \\n\n" |
| 44154 | "/// 1: The PE field is not updated \\n\n" |
| 44155 | "/// Bit [2] is the rounding control source: \\n\n" |
| 44156 | "/// 0: Use bits [1:0] of \\a M \\n\n" |
| 44157 | "/// 1: Use the current MXCSR setting \\n\n" |
| 44158 | "/// Bits [1:0] contain the rounding control definition: \\n\n" |
| 44159 | "/// 00: Nearest \\n\n" |
| 44160 | "/// 01: Downward (toward negative infinity) \\n\n" |
| 44161 | "/// 10: Upward (toward positive infinity) \\n\n" |
| 44162 | "/// 11: Truncated\n" |
| 44163 | "/// \\returns A 128-bit vector of [2 x double] containing the copied and rounded\n" |
| 44164 | "/// values.\n" |
| 44165 | "#define _mm_round_sd(X, Y, M) \\\n" |
| 44166 | " (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \\\n" |
| 44167 | " (__v2df)(__m128d)(Y), (M))\n" |
| 44168 | "\n" |
| 44169 | "/* SSE4 Packed Blending Intrinsics. */\n" |
| 44170 | "/// Returns a 128-bit vector of [2 x double] where the values are\n" |
| 44171 | "/// selected from either the first or second operand as specified by the\n" |
| 44172 | "/// third operand, the control mask.\n" |
| 44173 | "///\n" |
| 44174 | "/// \\headerfile <x86intrin.h>\n" |
| 44175 | "///\n" |
| 44176 | "/// \\code\n" |
| 44177 | "/// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M);\n" |
| 44178 | "/// \\endcode\n" |
| 44179 | "///\n" |
| 44180 | "/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction.\n" |
| 44181 | "///\n" |
| 44182 | "/// \\param V1\n" |
| 44183 | "/// A 128-bit vector of [2 x double].\n" |
| 44184 | "/// \\param V2\n" |
| 44185 | "/// A 128-bit vector of [2 x double].\n" |
| 44186 | "/// \\param M\n" |
| 44187 | "/// An immediate integer operand, with mask bits [1:0] specifying how the\n" |
| 44188 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
| 44189 | "/// index of a copied value. When a mask bit is 0, the corresponding 64-bit\n" |
| 44190 | "/// element in operand \\a V1 is copied to the same position in the result.\n" |
| 44191 | "/// When a mask bit is 1, the corresponding 64-bit element in operand \\a V2\n" |
| 44192 | "/// is copied to the same position in the result.\n" |
| 44193 | "/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n" |
| 44194 | "#define _mm_blend_pd(V1, V2, M) \\\n" |
| 44195 | " (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \\\n" |
| 44196 | " (__v2df)(__m128d)(V2), (int)(M))\n" |
| 44197 | "\n" |
| 44198 | "/// Returns a 128-bit vector of [4 x float] where the values are selected\n" |
| 44199 | "/// from either the first or second operand as specified by the third\n" |
| 44200 | "/// operand, the control mask.\n" |
| 44201 | "///\n" |
| 44202 | "/// \\headerfile <x86intrin.h>\n" |
| 44203 | "///\n" |
| 44204 | "/// \\code\n" |
| 44205 | "/// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M);\n" |
| 44206 | "/// \\endcode\n" |
| 44207 | "///\n" |
| 44208 | "/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction.\n" |
| 44209 | "///\n" |
| 44210 | "/// \\param V1\n" |
| 44211 | "/// A 128-bit vector of [4 x float].\n" |
| 44212 | "/// \\param V2\n" |
| 44213 | "/// A 128-bit vector of [4 x float].\n" |
| 44214 | "/// \\param M\n" |
| 44215 | "/// An immediate integer operand, with mask bits [3:0] specifying how the\n" |
| 44216 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
| 44217 | "/// index of a copied value. When a mask bit is 0, the corresponding 32-bit\n" |
| 44218 | "/// element in operand \\a V1 is copied to the same position in the result.\n" |
| 44219 | "/// When a mask bit is 1, the corresponding 32-bit element in operand \\a V2\n" |
| 44220 | "/// is copied to the same position in the result.\n" |
| 44221 | "/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n" |
| 44222 | "#define _mm_blend_ps(V1, V2, M) \\\n" |
| 44223 | " (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \\\n" |
| 44224 | " (__v4sf)(__m128)(V2), (int)(M))\n" |
| 44225 | "\n" |
| 44226 | "/// Returns a 128-bit vector of [2 x double] where the values are\n" |
| 44227 | "/// selected from either the first or second operand as specified by the\n" |
| 44228 | "/// third operand, the control mask.\n" |
| 44229 | "///\n" |
| 44230 | "/// \\headerfile <x86intrin.h>\n" |
| 44231 | "///\n" |
| 44232 | "/// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction.\n" |
| 44233 | "///\n" |
| 44234 | "/// \\param __V1\n" |
| 44235 | "/// A 128-bit vector of [2 x double].\n" |
| 44236 | "/// \\param __V2\n" |
| 44237 | "/// A 128-bit vector of [2 x double].\n" |
| 44238 | "/// \\param __M\n" |
| 44239 | "/// A 128-bit vector operand, with mask bits 127 and 63 specifying how the\n" |
| 44240 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
| 44241 | "/// most significant bit of a copied value. When a mask bit is 0, the\n" |
| 44242 | "/// corresponding 64-bit element in operand \\a __V1 is copied to the same\n" |
| 44243 | "/// position in the result. When a mask bit is 1, the corresponding 64-bit\n" |
| 44244 | "/// element in operand \\a __V2 is copied to the same position in the result.\n" |
| 44245 | "/// \\returns A 128-bit vector of [2 x double] containing the copied values.\n" |
| 44246 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 44247 | "_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)\n" |
| 44248 | "{\n" |
| 44249 | " return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,\n" |
| 44250 | " (__v2df)__M);\n" |
| 44251 | "}\n" |
| 44252 | "\n" |
| 44253 | "/// Returns a 128-bit vector of [4 x float] where the values are\n" |
| 44254 | "/// selected from either the first or second operand as specified by the\n" |
| 44255 | "/// third operand, the control mask.\n" |
| 44256 | "///\n" |
| 44257 | "/// \\headerfile <x86intrin.h>\n" |
| 44258 | "///\n" |
| 44259 | "/// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction.\n" |
| 44260 | "///\n" |
| 44261 | "/// \\param __V1\n" |
| 44262 | "/// A 128-bit vector of [4 x float].\n" |
| 44263 | "/// \\param __V2\n" |
| 44264 | "/// A 128-bit vector of [4 x float].\n" |
| 44265 | "/// \\param __M\n" |
| 44266 | "/// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying\n" |
| 44267 | "/// how the values are to be copied. The position of the mask bit corresponds\n" |
| 44268 | "/// to the most significant bit of a copied value. When a mask bit is 0, the\n" |
| 44269 | "/// corresponding 32-bit element in operand \\a __V1 is copied to the same\n" |
| 44270 | "/// position in the result. When a mask bit is 1, the corresponding 32-bit\n" |
| 44271 | "/// element in operand \\a __V2 is copied to the same position in the result.\n" |
| 44272 | "/// \\returns A 128-bit vector of [4 x float] containing the copied values.\n" |
| 44273 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 44274 | "_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)\n" |
| 44275 | "{\n" |
| 44276 | " return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,\n" |
| 44277 | " (__v4sf)__M);\n" |
| 44278 | "}\n" |
| 44279 | "\n" |
| 44280 | "/// Returns a 128-bit vector of [16 x i8] where the values are selected\n" |
| 44281 | "/// from either of the first or second operand as specified by the third\n" |
| 44282 | "/// operand, the control mask.\n" |
| 44283 | "///\n" |
| 44284 | "/// \\headerfile <x86intrin.h>\n" |
| 44285 | "///\n" |
| 44286 | "/// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction.\n" |
| 44287 | "///\n" |
| 44288 | "/// \\param __V1\n" |
| 44289 | "/// A 128-bit vector of [16 x i8].\n" |
| 44290 | "/// \\param __V2\n" |
| 44291 | "/// A 128-bit vector of [16 x i8].\n" |
| 44292 | "/// \\param __M\n" |
| 44293 | "/// A 128-bit vector operand, with mask bits 127, 119, 111...7 specifying\n" |
| 44294 | "/// how the values are to be copied. The position of the mask bit corresponds\n" |
| 44295 | "/// to the most significant bit of a copied value. When a mask bit is 0, the\n" |
| 44296 | "/// corresponding 8-bit element in operand \\a __V1 is copied to the same\n" |
| 44297 | "/// position in the result. When a mask bit is 1, the corresponding 8-bit\n" |
| 44298 | "/// element in operand \\a __V2 is copied to the same position in the result.\n" |
| 44299 | "/// \\returns A 128-bit vector of [16 x i8] containing the copied values.\n" |
| 44300 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44301 | "_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)\n" |
| 44302 | "{\n" |
| 44303 | " return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,\n" |
| 44304 | " (__v16qi)__M);\n" |
| 44305 | "}\n" |
| 44306 | "\n" |
| 44307 | "/// Returns a 128-bit vector of [8 x i16] where the values are selected\n" |
| 44308 | "/// from either of the first or second operand as specified by the third\n" |
| 44309 | "/// operand, the control mask.\n" |
| 44310 | "///\n" |
| 44311 | "/// \\headerfile <x86intrin.h>\n" |
| 44312 | "///\n" |
| 44313 | "/// \\code\n" |
| 44314 | "/// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M);\n" |
| 44315 | "/// \\endcode\n" |
| 44316 | "///\n" |
| 44317 | "/// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction.\n" |
| 44318 | "///\n" |
| 44319 | "/// \\param V1\n" |
| 44320 | "/// A 128-bit vector of [8 x i16].\n" |
| 44321 | "/// \\param V2\n" |
| 44322 | "/// A 128-bit vector of [8 x i16].\n" |
| 44323 | "/// \\param M\n" |
| 44324 | "/// An immediate integer operand, with mask bits [7:0] specifying how the\n" |
| 44325 | "/// values are to be copied. The position of the mask bit corresponds to the\n" |
| 44326 | "/// index of a copied value. When a mask bit is 0, the corresponding 16-bit\n" |
| 44327 | "/// element in operand \\a V1 is copied to the same position in the result.\n" |
| 44328 | "/// When a mask bit is 1, the corresponding 16-bit element in operand \\a V2\n" |
| 44329 | "/// is copied to the same position in the result.\n" |
| 44330 | "/// \\returns A 128-bit vector of [8 x i16] containing the copied values.\n" |
| 44331 | "#define _mm_blend_epi16(V1, V2, M) \\\n" |
| 44332 | " (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \\\n" |
| 44333 | " (__v8hi)(__m128i)(V2), (int)(M))\n" |
| 44334 | "\n" |
| 44335 | "/* SSE4 Dword Multiply Instructions. */\n" |
| 44336 | "/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]\n" |
| 44337 | "/// and returns the lower 32 bits of the each product in a 128-bit vector of\n" |
| 44338 | "/// [4 x i32].\n" |
| 44339 | "///\n" |
| 44340 | "/// \\headerfile <x86intrin.h>\n" |
| 44341 | "///\n" |
| 44342 | "/// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction.\n" |
| 44343 | "///\n" |
| 44344 | "/// \\param __V1\n" |
| 44345 | "/// A 128-bit integer vector.\n" |
| 44346 | "/// \\param __V2\n" |
| 44347 | "/// A 128-bit integer vector.\n" |
| 44348 | "/// \\returns A 128-bit integer vector containing the products of both operands.\n" |
| 44349 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44350 | "_mm_mullo_epi32 (__m128i __V1, __m128i __V2)\n" |
| 44351 | "{\n" |
| 44352 | " return (__m128i) ((__v4su)__V1 * (__v4su)__V2);\n" |
| 44353 | "}\n" |
| 44354 | "\n" |
| 44355 | "/// Multiplies corresponding even-indexed elements of two 128-bit\n" |
| 44356 | "/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64]\n" |
| 44357 | "/// containing the products.\n" |
| 44358 | "///\n" |
| 44359 | "/// \\headerfile <x86intrin.h>\n" |
| 44360 | "///\n" |
| 44361 | "/// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction.\n" |
| 44362 | "///\n" |
| 44363 | "/// \\param __V1\n" |
| 44364 | "/// A 128-bit vector of [4 x i32].\n" |
| 44365 | "/// \\param __V2\n" |
| 44366 | "/// A 128-bit vector of [4 x i32].\n" |
| 44367 | "/// \\returns A 128-bit vector of [2 x i64] containing the products of both\n" |
| 44368 | "/// operands.\n" |
| 44369 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44370 | "_mm_mul_epi32 (__m128i __V1, __m128i __V2)\n" |
| 44371 | "{\n" |
| 44372 | " return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);\n" |
| 44373 | "}\n" |
| 44374 | "\n" |
| 44375 | "/* SSE4 Floating Point Dot Product Instructions. */\n" |
| 44376 | "/// Computes the dot product of the two 128-bit vectors of [4 x float]\n" |
| 44377 | "/// and returns it in the elements of the 128-bit result vector of\n" |
| 44378 | "/// [4 x float].\n" |
| 44379 | "///\n" |
| 44380 | "/// The immediate integer operand controls which input elements\n" |
| 44381 | "/// will contribute to the dot product, and where the final results are\n" |
| 44382 | "/// returned.\n" |
| 44383 | "///\n" |
| 44384 | "/// \\headerfile <x86intrin.h>\n" |
| 44385 | "///\n" |
| 44386 | "/// \\code\n" |
| 44387 | "/// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M);\n" |
| 44388 | "/// \\endcode\n" |
| 44389 | "///\n" |
| 44390 | "/// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction.\n" |
| 44391 | "///\n" |
| 44392 | "/// \\param X\n" |
| 44393 | "/// A 128-bit vector of [4 x float].\n" |
| 44394 | "/// \\param Y\n" |
| 44395 | "/// A 128-bit vector of [4 x float].\n" |
| 44396 | "/// \\param M\n" |
| 44397 | "/// An immediate integer operand. Mask bits [7:4] determine which elements\n" |
| 44398 | "/// of the input vectors are used, with bit [4] corresponding to the lowest\n" |
| 44399 | "/// element and bit [7] corresponding to the highest element of each [4 x\n" |
| 44400 | "/// float] vector. If a bit is set, the corresponding elements from the two\n" |
| 44401 | "/// input vectors are used as an input for dot product; otherwise that input\n" |
| 44402 | "/// is treated as zero. Bits [3:0] determine which elements of the result\n" |
| 44403 | "/// will receive a copy of the final dot product, with bit [0] corresponding\n" |
| 44404 | "/// to the lowest element and bit [3] corresponding to the highest element of\n" |
| 44405 | "/// each [4 x float] subvector. If a bit is set, the dot product is returned\n" |
| 44406 | "/// in the corresponding element; otherwise that element is set to zero.\n" |
| 44407 | "/// \\returns A 128-bit vector of [4 x float] containing the dot product.\n" |
| 44408 | "#define _mm_dp_ps(X, Y, M) \\\n" |
| 44409 | " (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \\\n" |
| 44410 | " (__v4sf)(__m128)(Y), (M))\n" |
| 44411 | "\n" |
| 44412 | "/// Computes the dot product of the two 128-bit vectors of [2 x double]\n" |
| 44413 | "/// and returns it in the elements of the 128-bit result vector of\n" |
| 44414 | "/// [2 x double].\n" |
| 44415 | "///\n" |
| 44416 | "/// The immediate integer operand controls which input\n" |
| 44417 | "/// elements will contribute to the dot product, and where the final results\n" |
| 44418 | "/// are returned.\n" |
| 44419 | "///\n" |
| 44420 | "/// \\headerfile <x86intrin.h>\n" |
| 44421 | "///\n" |
| 44422 | "/// \\code\n" |
| 44423 | "/// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M);\n" |
| 44424 | "/// \\endcode\n" |
| 44425 | "///\n" |
| 44426 | "/// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction.\n" |
| 44427 | "///\n" |
| 44428 | "/// \\param X\n" |
| 44429 | "/// A 128-bit vector of [2 x double].\n" |
| 44430 | "/// \\param Y\n" |
| 44431 | "/// A 128-bit vector of [2 x double].\n" |
| 44432 | "/// \\param M\n" |
| 44433 | "/// An immediate integer operand. Mask bits [5:4] determine which elements\n" |
| 44434 | "/// of the input vectors are used, with bit [4] corresponding to the lowest\n" |
| 44435 | "/// element and bit [5] corresponding to the highest element of each of [2 x\n" |
| 44436 | "/// double] vector. If a bit is set, the corresponding elements from the two\n" |
| 44437 | "/// input vectors are used as an input for dot product; otherwise that input\n" |
| 44438 | "/// is treated as zero. Bits [1:0] determine which elements of the result\n" |
| 44439 | "/// will receive a copy of the final dot product, with bit [0] corresponding\n" |
| 44440 | "/// to the lowest element and bit [1] corresponding to the highest element of\n" |
| 44441 | "/// each [2 x double] vector. If a bit is set, the dot product is returned in\n" |
| 44442 | "/// the corresponding element; otherwise that element is set to zero.\n" |
| 44443 | "#define _mm_dp_pd(X, Y, M) \\\n" |
| 44444 | " (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \\\n" |
| 44445 | " (__v2df)(__m128d)(Y), (M))\n" |
| 44446 | "\n" |
| 44447 | "/* SSE4 Streaming Load Hint Instruction. */\n" |
| 44448 | "/// Loads integer values from a 128-bit aligned memory location to a\n" |
| 44449 | "/// 128-bit integer vector.\n" |
| 44450 | "///\n" |
| 44451 | "/// \\headerfile <x86intrin.h>\n" |
| 44452 | "///\n" |
| 44453 | "/// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction.\n" |
| 44454 | "///\n" |
| 44455 | "/// \\param __V\n" |
| 44456 | "/// A pointer to a 128-bit aligned memory location that contains the integer\n" |
| 44457 | "/// values.\n" |
| 44458 | "/// \\returns A 128-bit integer vector containing the data stored at the\n" |
| 44459 | "/// specified memory location.\n" |
| 44460 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44461 | "_mm_stream_load_si128 (__m128i const *__V)\n" |
| 44462 | "{\n" |
| 44463 | " return (__m128i) __builtin_nontemporal_load ((const __v2di *) __V);\n" |
| 44464 | "}\n" |
| 44465 | "\n" |
| 44466 | "/* SSE4 Packed Integer Min/Max Instructions. */\n" |
| 44467 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
| 44468 | "/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser\n" |
| 44469 | "/// of the two values.\n" |
| 44470 | "///\n" |
| 44471 | "/// \\headerfile <x86intrin.h>\n" |
| 44472 | "///\n" |
| 44473 | "/// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction.\n" |
| 44474 | "///\n" |
| 44475 | "/// \\param __V1\n" |
| 44476 | "/// A 128-bit vector of [16 x i8].\n" |
| 44477 | "/// \\param __V2\n" |
| 44478 | "/// A 128-bit vector of [16 x i8]\n" |
| 44479 | "/// \\returns A 128-bit vector of [16 x i8] containing the lesser values.\n" |
| 44480 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44481 | "_mm_min_epi8 (__m128i __V1, __m128i __V2)\n" |
| 44482 | "{\n" |
| 44483 | " return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);\n" |
| 44484 | "}\n" |
| 44485 | "\n" |
| 44486 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
| 44487 | "/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the\n" |
| 44488 | "/// greater value of the two.\n" |
| 44489 | "///\n" |
| 44490 | "/// \\headerfile <x86intrin.h>\n" |
| 44491 | "///\n" |
| 44492 | "/// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction.\n" |
| 44493 | "///\n" |
| 44494 | "/// \\param __V1\n" |
| 44495 | "/// A 128-bit vector of [16 x i8].\n" |
| 44496 | "/// \\param __V2\n" |
| 44497 | "/// A 128-bit vector of [16 x i8].\n" |
| 44498 | "/// \\returns A 128-bit vector of [16 x i8] containing the greater values.\n" |
| 44499 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44500 | "_mm_max_epi8 (__m128i __V1, __m128i __V2)\n" |
| 44501 | "{\n" |
| 44502 | " return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);\n" |
| 44503 | "}\n" |
| 44504 | "\n" |
| 44505 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
| 44506 | "/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser\n" |
| 44507 | "/// value of the two.\n" |
| 44508 | "///\n" |
| 44509 | "/// \\headerfile <x86intrin.h>\n" |
| 44510 | "///\n" |
| 44511 | "/// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction.\n" |
| 44512 | "///\n" |
| 44513 | "/// \\param __V1\n" |
| 44514 | "/// A 128-bit vector of [8 x u16].\n" |
| 44515 | "/// \\param __V2\n" |
| 44516 | "/// A 128-bit vector of [8 x u16].\n" |
| 44517 | "/// \\returns A 128-bit vector of [8 x u16] containing the lesser values.\n" |
| 44518 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44519 | "_mm_min_epu16 (__m128i __V1, __m128i __V2)\n" |
| 44520 | "{\n" |
| 44521 | " return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);\n" |
| 44522 | "}\n" |
| 44523 | "\n" |
| 44524 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
| 44525 | "/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the\n" |
| 44526 | "/// greater value of the two.\n" |
| 44527 | "///\n" |
| 44528 | "/// \\headerfile <x86intrin.h>\n" |
| 44529 | "///\n" |
| 44530 | "/// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction.\n" |
| 44531 | "///\n" |
| 44532 | "/// \\param __V1\n" |
| 44533 | "/// A 128-bit vector of [8 x u16].\n" |
| 44534 | "/// \\param __V2\n" |
| 44535 | "/// A 128-bit vector of [8 x u16].\n" |
| 44536 | "/// \\returns A 128-bit vector of [8 x u16] containing the greater values.\n" |
| 44537 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44538 | "_mm_max_epu16 (__m128i __V1, __m128i __V2)\n" |
| 44539 | "{\n" |
| 44540 | " return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);\n" |
| 44541 | "}\n" |
| 44542 | "\n" |
| 44543 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
| 44544 | "/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser\n" |
| 44545 | "/// value of the two.\n" |
| 44546 | "///\n" |
| 44547 | "/// \\headerfile <x86intrin.h>\n" |
| 44548 | "///\n" |
| 44549 | "/// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction.\n" |
| 44550 | "///\n" |
| 44551 | "/// \\param __V1\n" |
| 44552 | "/// A 128-bit vector of [4 x i32].\n" |
| 44553 | "/// \\param __V2\n" |
| 44554 | "/// A 128-bit vector of [4 x i32].\n" |
| 44555 | "/// \\returns A 128-bit vector of [4 x i32] containing the lesser values.\n" |
| 44556 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44557 | "_mm_min_epi32 (__m128i __V1, __m128i __V2)\n" |
| 44558 | "{\n" |
| 44559 | " return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);\n" |
| 44560 | "}\n" |
| 44561 | "\n" |
| 44562 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
| 44563 | "/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the\n" |
| 44564 | "/// greater value of the two.\n" |
| 44565 | "///\n" |
| 44566 | "/// \\headerfile <x86intrin.h>\n" |
| 44567 | "///\n" |
| 44568 | "/// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction.\n" |
| 44569 | "///\n" |
| 44570 | "/// \\param __V1\n" |
| 44571 | "/// A 128-bit vector of [4 x i32].\n" |
| 44572 | "/// \\param __V2\n" |
| 44573 | "/// A 128-bit vector of [4 x i32].\n" |
| 44574 | "/// \\returns A 128-bit vector of [4 x i32] containing the greater values.\n" |
| 44575 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44576 | "_mm_max_epi32 (__m128i __V1, __m128i __V2)\n" |
| 44577 | "{\n" |
| 44578 | " return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);\n" |
| 44579 | "}\n" |
| 44580 | "\n" |
| 44581 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
| 44582 | "/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser\n" |
| 44583 | "/// value of the two.\n" |
| 44584 | "///\n" |
| 44585 | "/// \\headerfile <x86intrin.h>\n" |
| 44586 | "///\n" |
| 44587 | "/// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction.\n" |
| 44588 | "///\n" |
| 44589 | "/// \\param __V1\n" |
| 44590 | "/// A 128-bit vector of [4 x u32].\n" |
| 44591 | "/// \\param __V2\n" |
| 44592 | "/// A 128-bit vector of [4 x u32].\n" |
| 44593 | "/// \\returns A 128-bit vector of [4 x u32] containing the lesser values.\n" |
| 44594 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44595 | "_mm_min_epu32 (__m128i __V1, __m128i __V2)\n" |
| 44596 | "{\n" |
| 44597 | " return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);\n" |
| 44598 | "}\n" |
| 44599 | "\n" |
| 44600 | "/// Compares the corresponding elements of two 128-bit vectors of\n" |
| 44601 | "/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the\n" |
| 44602 | "/// greater value of the two.\n" |
| 44603 | "///\n" |
| 44604 | "/// \\headerfile <x86intrin.h>\n" |
| 44605 | "///\n" |
| 44606 | "/// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction.\n" |
| 44607 | "///\n" |
| 44608 | "/// \\param __V1\n" |
| 44609 | "/// A 128-bit vector of [4 x u32].\n" |
| 44610 | "/// \\param __V2\n" |
| 44611 | "/// A 128-bit vector of [4 x u32].\n" |
| 44612 | "/// \\returns A 128-bit vector of [4 x u32] containing the greater values.\n" |
| 44613 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 44614 | "_mm_max_epu32 (__m128i __V1, __m128i __V2)\n" |
| 44615 | "{\n" |
| 44616 | " return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);\n" |
| 44617 | "}\n" |
| 44618 | "\n" |
| 44619 | "/* SSE4 Insertion and Extraction from XMM Register Instructions. */\n" |
| 44620 | "/// Takes the first argument \\a X and inserts an element from the second\n" |
| 44621 | "/// argument \\a Y as selected by the third argument \\a N. That result then\n" |
| 44622 | "/// has elements zeroed out also as selected by the third argument \\a N. The\n" |
| 44623 | "/// resulting 128-bit vector of [4 x float] is then returned.\n" |
| 44624 | "///\n" |
| 44625 | "/// \\headerfile <x86intrin.h>\n" |
| 44626 | "///\n" |
| 44627 | "/// \\code\n" |
| 44628 | "/// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N);\n" |
| 44629 | "/// \\endcode\n" |
| 44630 | "///\n" |
| 44631 | "/// This intrinsic corresponds to the <c> VINSERTPS </c> instruction.\n" |
| 44632 | "///\n" |
| 44633 | "/// \\param X\n" |
| 44634 | "/// A 128-bit vector source operand of [4 x float]. With the exception of\n" |
| 44635 | "/// those bits in the result copied from parameter \\a Y and zeroed by bits\n" |
| 44636 | "/// [3:0] of \\a N, all bits from this parameter are copied to the result.\n" |
| 44637 | "/// \\param Y\n" |
| 44638 | "/// A 128-bit vector source operand of [4 x float]. One single-precision\n" |
| 44639 | "/// floating-point element from this source, as determined by the immediate\n" |
| 44640 | "/// parameter, is copied to the result.\n" |
| 44641 | "/// \\param N\n" |
| 44642 | "/// Specifies which bits from operand \\a Y will be copied, which bits in the\n" |
| 44643 | "/// result they will be be copied to, and which bits in the result will be\n" |
| 44644 | "/// cleared. The following assignments are made: \\n\n" |
| 44645 | "/// Bits [7:6] specify the bits to copy from operand \\a Y: \\n\n" |
| 44646 | "/// 00: Selects bits [31:0] from operand \\a Y. \\n\n" |
| 44647 | "/// 01: Selects bits [63:32] from operand \\a Y. \\n\n" |
| 44648 | "/// 10: Selects bits [95:64] from operand \\a Y. \\n\n" |
| 44649 | "/// 11: Selects bits [127:96] from operand \\a Y. \\n\n" |
| 44650 | "/// Bits [5:4] specify the bits in the result to which the selected bits\n" |
| 44651 | "/// from operand \\a Y are copied: \\n\n" |
| 44652 | "/// 00: Copies the selected bits from \\a Y to result bits [31:0]. \\n\n" |
| 44653 | "/// 01: Copies the selected bits from \\a Y to result bits [63:32]. \\n\n" |
| 44654 | "/// 10: Copies the selected bits from \\a Y to result bits [95:64]. \\n\n" |
| 44655 | "/// 11: Copies the selected bits from \\a Y to result bits [127:96]. \\n\n" |
| 44656 | "/// Bits[3:0]: If any of these bits are set, the corresponding result\n" |
| 44657 | "/// element is cleared.\n" |
| 44658 | "/// \\returns A 128-bit vector of [4 x float] containing the copied\n" |
| 44659 | "/// single-precision floating point elements from the operands.\n" |
| 44660 | "#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))\n" |
| 44661 | "\n" |
| 44662 | "/// Extracts a 32-bit integer from a 128-bit vector of [4 x float] and\n" |
| 44663 | "/// returns it, using the immediate value parameter \\a N as a selector.\n" |
| 44664 | "///\n" |
| 44665 | "/// \\headerfile <x86intrin.h>\n" |
| 44666 | "///\n" |
| 44667 | "/// \\code\n" |
| 44668 | "/// int _mm_extract_ps(__m128 X, const int N);\n" |
| 44669 | "/// \\endcode\n" |
| 44670 | "///\n" |
| 44671 | "/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c>\n" |
| 44672 | "/// instruction.\n" |
| 44673 | "///\n" |
| 44674 | "/// \\param X\n" |
| 44675 | "/// A 128-bit vector of [4 x float].\n" |
| 44676 | "/// \\param N\n" |
| 44677 | "/// An immediate value. Bits [1:0] determines which bits from the argument\n" |
| 44678 | "/// \\a X are extracted and returned: \\n\n" |
| 44679 | "/// 00: Bits [31:0] of parameter \\a X are returned. \\n\n" |
| 44680 | "/// 01: Bits [63:32] of parameter \\a X are returned. \\n\n" |
| 44681 | "/// 10: Bits [95:64] of parameter \\a X are returned. \\n\n" |
| 44682 | "/// 11: Bits [127:96] of parameter \\a X are returned.\n" |
| 44683 | "/// \\returns A 32-bit integer containing the extracted 32 bits of float data.\n" |
| 44684 | "#define _mm_extract_ps(X, N) (__extension__ \\\n" |
| 44685 | " ({ union { int __i; float __f; } __t; \\\n" |
| 44686 | " __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \\\n" |
| 44687 | " __t.__i;}))\n" |
| 44688 | "\n" |
| 44689 | "/* Miscellaneous insert and extract macros. */\n" |
| 44690 | "/* Extract a single-precision float from X at index N into D. */\n" |
| 44691 | "#define _MM_EXTRACT_FLOAT(D, X, N) \\\n" |
| 44692 | " { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }\n" |
| 44693 | "\n" |
| 44694 | "/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create\n" |
| 44695 | " an index suitable for _mm_insert_ps. */\n" |
| 44696 | "#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))\n" |
| 44697 | "\n" |
| 44698 | "/* Extract a float from X at index N into the first index of the return. */\n" |
| 44699 | "#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \\\n" |
| 44700 | " _MM_MK_INSERTPS_NDX((N), 0, 0x0e))\n" |
| 44701 | "\n" |
| 44702 | "/* Insert int into packed integer array at index. */\n" |
| 44703 | "/// Constructs a 128-bit vector of [16 x i8] by first making a copy of\n" |
| 44704 | "/// the 128-bit integer vector parameter, and then inserting the lower 8 bits\n" |
| 44705 | "/// of an integer parameter \\a I into an offset specified by the immediate\n" |
| 44706 | "/// value parameter \\a N.\n" |
| 44707 | "///\n" |
| 44708 | "/// \\headerfile <x86intrin.h>\n" |
| 44709 | "///\n" |
| 44710 | "/// \\code\n" |
| 44711 | "/// __m128i _mm_insert_epi8(__m128i X, int I, const int N);\n" |
| 44712 | "/// \\endcode\n" |
| 44713 | "///\n" |
| 44714 | "/// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction.\n" |
| 44715 | "///\n" |
| 44716 | "/// \\param X\n" |
| 44717 | "/// A 128-bit integer vector of [16 x i8]. This vector is copied to the\n" |
| 44718 | "/// result and then one of the sixteen elements in the result vector is\n" |
| 44719 | "/// replaced by the lower 8 bits of \\a I.\n" |
| 44720 | "/// \\param I\n" |
| 44721 | "/// An integer. The lower 8 bits of this operand are written to the result\n" |
| 44722 | "/// beginning at the offset specified by \\a N.\n" |
| 44723 | "/// \\param N\n" |
| 44724 | "/// An immediate value. Bits [3:0] specify the bit offset in the result at\n" |
| 44725 | "/// which the lower 8 bits of \\a I are written. \\n\n" |
| 44726 | "/// 0000: Bits [7:0] of the result are used for insertion. \\n\n" |
| 44727 | "/// 0001: Bits [15:8] of the result are used for insertion. \\n\n" |
| 44728 | "/// 0010: Bits [23:16] of the result are used for insertion. \\n\n" |
| 44729 | "/// 0011: Bits [31:24] of the result are used for insertion. \\n\n" |
| 44730 | "/// 0100: Bits [39:32] of the result are used for insertion. \\n\n" |
| 44731 | "/// 0101: Bits [47:40] of the result are used for insertion. \\n\n" |
| 44732 | "/// 0110: Bits [55:48] of the result are used for insertion. \\n\n" |
| 44733 | "/// 0111: Bits [63:56] of the result are used for insertion. \\n\n" |
| 44734 | "/// 1000: Bits [71:64] of the result are used for insertion. \\n\n" |
| 44735 | "/// 1001: Bits [79:72] of the result are used for insertion. \\n\n" |
| 44736 | "/// 1010: Bits [87:80] of the result are used for insertion. \\n\n" |
| 44737 | "/// 1011: Bits [95:88] of the result are used for insertion. \\n\n" |
| 44738 | "/// 1100: Bits [103:96] of the result are used for insertion. \\n\n" |
| 44739 | "/// 1101: Bits [111:104] of the result are used for insertion. \\n\n" |
| 44740 | "/// 1110: Bits [119:112] of the result are used for insertion. \\n\n" |
| 44741 | "/// 1111: Bits [127:120] of the result are used for insertion.\n" |
| 44742 | "/// \\returns A 128-bit integer vector containing the constructed values.\n" |
| 44743 | "#define _mm_insert_epi8(X, I, N) \\\n" |
| 44744 | " (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \\\n" |
| 44745 | " (int)(I), (int)(N))\n" |
| 44746 | "\n" |
| 44747 | "/// Constructs a 128-bit vector of [4 x i32] by first making a copy of\n" |
| 44748 | "/// the 128-bit integer vector parameter, and then inserting the 32-bit\n" |
| 44749 | "/// integer parameter \\a I at the offset specified by the immediate value\n" |
| 44750 | "/// parameter \\a N.\n" |
| 44751 | "///\n" |
| 44752 | "/// \\headerfile <x86intrin.h>\n" |
| 44753 | "///\n" |
| 44754 | "/// \\code\n" |
| 44755 | "/// __m128i _mm_insert_epi32(__m128i X, int I, const int N);\n" |
| 44756 | "/// \\endcode\n" |
| 44757 | "///\n" |
| 44758 | "/// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction.\n" |
| 44759 | "///\n" |
| 44760 | "/// \\param X\n" |
| 44761 | "/// A 128-bit integer vector of [4 x i32]. This vector is copied to the\n" |
| 44762 | "/// result and then one of the four elements in the result vector is\n" |
| 44763 | "/// replaced by \\a I.\n" |
| 44764 | "/// \\param I\n" |
| 44765 | "/// A 32-bit integer that is written to the result beginning at the offset\n" |
| 44766 | "/// specified by \\a N.\n" |
| 44767 | "/// \\param N\n" |
| 44768 | "/// An immediate value. Bits [1:0] specify the bit offset in the result at\n" |
| 44769 | "/// which the integer \\a I is written. \\n\n" |
| 44770 | "/// 00: Bits [31:0] of the result are used for insertion. \\n\n" |
| 44771 | "/// 01: Bits [63:32] of the result are used for insertion. \\n\n" |
| 44772 | "/// 10: Bits [95:64] of the result are used for insertion. \\n\n" |
| 44773 | "/// 11: Bits [127:96] of the result are used for insertion.\n" |
| 44774 | "/// \\returns A 128-bit integer vector containing the constructed values.\n" |
| 44775 | "#define _mm_insert_epi32(X, I, N) \\\n" |
| 44776 | " (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \\\n" |
| 44777 | " (int)(I), (int)(N))\n" |
| 44778 | "\n" |
| 44779 | "#ifdef __x86_64__\n" |
| 44780 | "/// Constructs a 128-bit vector of [2 x i64] by first making a copy of\n" |
| 44781 | "/// the 128-bit integer vector parameter, and then inserting the 64-bit\n" |
| 44782 | "/// integer parameter \\a I, using the immediate value parameter \\a N as an\n" |
| 44783 | "/// insertion location selector.\n" |
| 44784 | "///\n" |
| 44785 | "/// \\headerfile <x86intrin.h>\n" |
| 44786 | "///\n" |
| 44787 | "/// \\code\n" |
| 44788 | "/// __m128i _mm_insert_epi64(__m128i X, long long I, const int N);\n" |
| 44789 | "/// \\endcode\n" |
| 44790 | "///\n" |
| 44791 | "/// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction.\n" |
| 44792 | "///\n" |
| 44793 | "/// \\param X\n" |
| 44794 | "/// A 128-bit integer vector of [2 x i64]. This vector is copied to the\n" |
| 44795 | "/// result and then one of the two elements in the result vector is replaced\n" |
| 44796 | "/// by \\a I.\n" |
| 44797 | "/// \\param I\n" |
| 44798 | "/// A 64-bit integer that is written to the result beginning at the offset\n" |
| 44799 | "/// specified by \\a N.\n" |
| 44800 | "/// \\param N\n" |
| 44801 | "/// An immediate value. Bit [0] specifies the bit offset in the result at\n" |
| 44802 | "/// which the integer \\a I is written. \\n\n" |
| 44803 | "/// 0: Bits [63:0] of the result are used for insertion. \\n\n" |
| 44804 | "/// 1: Bits [127:64] of the result are used for insertion. \\n\n" |
| 44805 | "/// \\returns A 128-bit integer vector containing the constructed values.\n" |
| 44806 | "#define _mm_insert_epi64(X, I, N) \\\n" |
| 44807 | " (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \\\n" |
| 44808 | " (long long)(I), (int)(N))\n" |
| 44809 | "#endif /* __x86_64__ */\n" |
| 44810 | "\n" |
| 44811 | "/* Extract int from packed integer array at index. This returns the element\n" |
| 44812 | " * as a zero extended value, so it is unsigned.\n" |
| 44813 | " */\n" |
| 44814 | "/// Extracts an 8-bit element from the 128-bit integer vector of\n" |
| 44815 | "/// [16 x i8], using the immediate value parameter \\a N as a selector.\n" |
| 44816 | "///\n" |
| 44817 | "/// \\headerfile <x86intrin.h>\n" |
| 44818 | "///\n" |
| 44819 | "/// \\code\n" |
| 44820 | "/// int _mm_extract_epi8(__m128i X, const int N);\n" |
| 44821 | "/// \\endcode\n" |
| 44822 | "///\n" |
| 44823 | "/// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction.\n" |
| 44824 | "///\n" |
| 44825 | "/// \\param X\n" |
| 44826 | "/// A 128-bit integer vector.\n" |
| 44827 | "/// \\param N\n" |
| 44828 | "/// An immediate value. Bits [3:0] specify which 8-bit vector element from\n" |
| 44829 | "/// the argument \\a X to extract and copy to the result. \\n\n" |
| 44830 | "/// 0000: Bits [7:0] of parameter \\a X are extracted. \\n\n" |
| 44831 | "/// 0001: Bits [15:8] of the parameter \\a X are extracted. \\n\n" |
| 44832 | "/// 0010: Bits [23:16] of the parameter \\a X are extracted. \\n\n" |
| 44833 | "/// 0011: Bits [31:24] of the parameter \\a X are extracted. \\n\n" |
| 44834 | "/// 0100: Bits [39:32] of the parameter \\a X are extracted. \\n\n" |
| 44835 | "/// 0101: Bits [47:40] of the parameter \\a X are extracted. \\n\n" |
| 44836 | "/// 0110: Bits [55:48] of the parameter \\a X are extracted. \\n\n" |
| 44837 | "/// 0111: Bits [63:56] of the parameter \\a X are extracted. \\n\n" |
| 44838 | "/// 1000: Bits [71:64] of the parameter \\a X are extracted. \\n\n" |
| 44839 | "/// 1001: Bits [79:72] of the parameter \\a X are extracted. \\n\n" |
| 44840 | "/// 1010: Bits [87:80] of the parameter \\a X are extracted. \\n\n" |
| 44841 | "/// 1011: Bits [95:88] of the parameter \\a X are extracted. \\n\n" |
| 44842 | "/// 1100: Bits [103:96] of the parameter \\a X are extracted. \\n\n" |
| 44843 | "/// 1101: Bits [111:104] of the parameter \\a X are extracted. \\n\n" |
| 44844 | "/// 1110: Bits [119:112] of the parameter \\a X are extracted. \\n\n" |
| 44845 | "/// 1111: Bits [127:120] of the parameter \\a X are extracted.\n" |
| 44846 | "/// \\returns An unsigned integer, whose lower 8 bits are selected from the\n" |
| 44847 | "/// 128-bit integer vector parameter and the remaining bits are assigned\n" |
| 44848 | "/// zeros.\n" |
| 44849 | "#define _mm_extract_epi8(X, N) \\\n" |
| 44850 | " (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \\\n" |
| 44851 | " (int)(N))\n" |
| 44852 | "\n" |
| 44853 | "/// Extracts a 32-bit element from the 128-bit integer vector of\n" |
| 44854 | "/// [4 x i32], using the immediate value parameter \\a N as a selector.\n" |
| 44855 | "///\n" |
| 44856 | "/// \\headerfile <x86intrin.h>\n" |
| 44857 | "///\n" |
| 44858 | "/// \\code\n" |
| 44859 | "/// int _mm_extract_epi32(__m128i X, const int N);\n" |
| 44860 | "/// \\endcode\n" |
| 44861 | "///\n" |
| 44862 | "/// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction.\n" |
| 44863 | "///\n" |
| 44864 | "/// \\param X\n" |
| 44865 | "/// A 128-bit integer vector.\n" |
| 44866 | "/// \\param N\n" |
| 44867 | "/// An immediate value. Bits [1:0] specify which 32-bit vector element from\n" |
| 44868 | "/// the argument \\a X to extract and copy to the result. \\n\n" |
| 44869 | "/// 00: Bits [31:0] of the parameter \\a X are extracted. \\n\n" |
| 44870 | "/// 01: Bits [63:32] of the parameter \\a X are extracted. \\n\n" |
| 44871 | "/// 10: Bits [95:64] of the parameter \\a X are extracted. \\n\n" |
| 44872 | "/// 11: Bits [127:96] of the parameter \\a X are exracted.\n" |
| 44873 | "/// \\returns An integer, whose lower 32 bits are selected from the 128-bit\n" |
| 44874 | "/// integer vector parameter and the remaining bits are assigned zeros.\n" |
| 44875 | "#define _mm_extract_epi32(X, N) \\\n" |
| 44876 | " (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))\n" |
| 44877 | "\n" |
| 44878 | "#ifdef __x86_64__\n" |
| 44879 | "/// Extracts a 64-bit element from the 128-bit integer vector of\n" |
| 44880 | "/// [2 x i64], using the immediate value parameter \\a N as a selector.\n" |
| 44881 | "///\n" |
| 44882 | "/// \\headerfile <x86intrin.h>\n" |
| 44883 | "///\n" |
| 44884 | "/// \\code\n" |
| 44885 | "/// long long _mm_extract_epi64(__m128i X, const int N);\n" |
| 44886 | "/// \\endcode\n" |
| 44887 | "///\n" |
| 44888 | "/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n" |
| 44889 | "///\n" |
| 44890 | "/// \\param X\n" |
| 44891 | "/// A 128-bit integer vector.\n" |
| 44892 | "/// \\param N\n" |
| 44893 | "/// An immediate value. Bit [0] specifies which 64-bit vector element from\n" |
| 44894 | "/// the argument \\a X to return. \\n\n" |
| 44895 | "/// 0: Bits [63:0] are returned. \\n\n" |
| 44896 | "/// 1: Bits [127:64] are returned. \\n\n" |
| 44897 | "/// \\returns A 64-bit integer.\n" |
| 44898 | "#define _mm_extract_epi64(X, N) \\\n" |
| 44899 | " (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))\n" |
| 44900 | "#endif /* __x86_64 */\n" |
| 44901 | "\n" |
| 44902 | "/* SSE4 128-bit Packed Integer Comparisons. */\n" |
| 44903 | "/// Tests whether the specified bits in a 128-bit integer vector are all\n" |
| 44904 | "/// zeros.\n" |
| 44905 | "///\n" |
| 44906 | "/// \\headerfile <x86intrin.h>\n" |
| 44907 | "///\n" |
| 44908 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
| 44909 | "///\n" |
| 44910 | "/// \\param __M\n" |
| 44911 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
| 44912 | "/// \\param __V\n" |
| 44913 | "/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n" |
| 44914 | "/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n" |
| 44915 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 44916 | "_mm_testz_si128(__m128i __M, __m128i __V)\n" |
| 44917 | "{\n" |
| 44918 | " return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);\n" |
| 44919 | "}\n" |
| 44920 | "\n" |
| 44921 | "/// Tests whether the specified bits in a 128-bit integer vector are all\n" |
| 44922 | "/// ones.\n" |
| 44923 | "///\n" |
| 44924 | "/// \\headerfile <x86intrin.h>\n" |
| 44925 | "///\n" |
| 44926 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
| 44927 | "///\n" |
| 44928 | "/// \\param __M\n" |
| 44929 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
| 44930 | "/// \\param __V\n" |
| 44931 | "/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n" |
| 44932 | "/// \\returns TRUE if the specified bits are all ones; FALSE otherwise.\n" |
| 44933 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 44934 | "_mm_testc_si128(__m128i __M, __m128i __V)\n" |
| 44935 | "{\n" |
| 44936 | " return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);\n" |
| 44937 | "}\n" |
| 44938 | "\n" |
| 44939 | "/// Tests whether the specified bits in a 128-bit integer vector are\n" |
| 44940 | "/// neither all zeros nor all ones.\n" |
| 44941 | "///\n" |
| 44942 | "/// \\headerfile <x86intrin.h>\n" |
| 44943 | "///\n" |
| 44944 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
| 44945 | "///\n" |
| 44946 | "/// \\param __M\n" |
| 44947 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
| 44948 | "/// \\param __V\n" |
| 44949 | "/// A 128-bit integer vector selecting which bits to test in operand \\a __M.\n" |
| 44950 | "/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n" |
| 44951 | "/// FALSE otherwise.\n" |
| 44952 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 44953 | "_mm_testnzc_si128(__m128i __M, __m128i __V)\n" |
| 44954 | "{\n" |
| 44955 | " return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);\n" |
| 44956 | "}\n" |
| 44957 | "\n" |
| 44958 | "/// Tests whether the specified bits in a 128-bit integer vector are all\n" |
| 44959 | "/// ones.\n" |
| 44960 | "///\n" |
| 44961 | "/// \\headerfile <x86intrin.h>\n" |
| 44962 | "///\n" |
| 44963 | "/// \\code\n" |
| 44964 | "/// int _mm_test_all_ones(__m128i V);\n" |
| 44965 | "/// \\endcode\n" |
| 44966 | "///\n" |
| 44967 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
| 44968 | "///\n" |
| 44969 | "/// \\param V\n" |
| 44970 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
| 44971 | "/// \\returns TRUE if the bits specified in the operand are all set to 1; FALSE\n" |
| 44972 | "/// otherwise.\n" |
| 44973 | "#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))\n" |
| 44974 | "\n" |
| 44975 | "/// Tests whether the specified bits in a 128-bit integer vector are\n" |
| 44976 | "/// neither all zeros nor all ones.\n" |
| 44977 | "///\n" |
| 44978 | "/// \\headerfile <x86intrin.h>\n" |
| 44979 | "///\n" |
| 44980 | "/// \\code\n" |
| 44981 | "/// int _mm_test_mix_ones_zeros(__m128i M, __m128i V);\n" |
| 44982 | "/// \\endcode\n" |
| 44983 | "///\n" |
| 44984 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
| 44985 | "///\n" |
| 44986 | "/// \\param M\n" |
| 44987 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
| 44988 | "/// \\param V\n" |
| 44989 | "/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n" |
| 44990 | "/// \\returns TRUE if the specified bits are neither all zeros nor all ones;\n" |
| 44991 | "/// FALSE otherwise.\n" |
| 44992 | "#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))\n" |
| 44993 | "\n" |
| 44994 | "/// Tests whether the specified bits in a 128-bit integer vector are all\n" |
| 44995 | "/// zeros.\n" |
| 44996 | "///\n" |
| 44997 | "/// \\headerfile <x86intrin.h>\n" |
| 44998 | "///\n" |
| 44999 | "/// \\code\n" |
| 45000 | "/// int _mm_test_all_zeros(__m128i M, __m128i V);\n" |
| 45001 | "/// \\endcode\n" |
| 45002 | "///\n" |
| 45003 | "/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction.\n" |
| 45004 | "///\n" |
| 45005 | "/// \\param M\n" |
| 45006 | "/// A 128-bit integer vector containing the bits to be tested.\n" |
| 45007 | "/// \\param V\n" |
| 45008 | "/// A 128-bit integer vector selecting which bits to test in operand \\a M.\n" |
| 45009 | "/// \\returns TRUE if the specified bits are all zeros; FALSE otherwise.\n" |
| 45010 | "#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))\n" |
| 45011 | "\n" |
| 45012 | "/* SSE4 64-bit Packed Integer Comparisons. */\n" |
| 45013 | "/// Compares each of the corresponding 64-bit values of the 128-bit\n" |
| 45014 | "/// integer vectors for equality.\n" |
| 45015 | "///\n" |
| 45016 | "/// \\headerfile <x86intrin.h>\n" |
| 45017 | "///\n" |
| 45018 | "/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.\n" |
| 45019 | "///\n" |
| 45020 | "/// \\param __V1\n" |
| 45021 | "/// A 128-bit integer vector.\n" |
| 45022 | "/// \\param __V2\n" |
| 45023 | "/// A 128-bit integer vector.\n" |
| 45024 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 45025 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45026 | "_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)\n" |
| 45027 | "{\n" |
| 45028 | " return (__m128i)((__v2di)__V1 == (__v2di)__V2);\n" |
| 45029 | "}\n" |
| 45030 | "\n" |
| 45031 | "/* SSE4 Packed Integer Sign-Extension. */\n" |
| 45032 | "/// Sign-extends each of the lower eight 8-bit integer elements of a\n" |
| 45033 | "/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n" |
| 45034 | "/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n" |
| 45035 | "/// are unused.\n" |
| 45036 | "///\n" |
| 45037 | "/// \\headerfile <x86intrin.h>\n" |
| 45038 | "///\n" |
| 45039 | "/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction.\n" |
| 45040 | "///\n" |
| 45041 | "/// \\param __V\n" |
| 45042 | "/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign-\n" |
| 45043 | "/// extended to 16-bit values.\n" |
| 45044 | "/// \\returns A 128-bit vector of [8 x i16] containing the sign-extended values.\n" |
| 45045 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45046 | "_mm_cvtepi8_epi16(__m128i __V)\n" |
| 45047 | "{\n" |
| 45048 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
| 45049 | " which may be signed or unsigned, so use __v16qs. */\n" |
| 45050 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n" |
| 45051 | "}\n" |
| 45052 | "\n" |
| 45053 | "/// Sign-extends each of the lower four 8-bit integer elements of a\n" |
| 45054 | "/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n" |
| 45055 | "/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n" |
| 45056 | "/// vector are unused.\n" |
| 45057 | "///\n" |
| 45058 | "/// \\headerfile <x86intrin.h>\n" |
| 45059 | "///\n" |
| 45060 | "/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction.\n" |
| 45061 | "///\n" |
| 45062 | "/// \\param __V\n" |
| 45063 | "/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n" |
| 45064 | "/// sign-extended to 32-bit values.\n" |
| 45065 | "/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n" |
| 45066 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45067 | "_mm_cvtepi8_epi32(__m128i __V)\n" |
| 45068 | "{\n" |
| 45069 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
| 45070 | " which may be signed or unsigned, so use __v16qs. */\n" |
| 45071 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);\n" |
| 45072 | "}\n" |
| 45073 | "\n" |
| 45074 | "/// Sign-extends each of the lower two 8-bit integer elements of a\n" |
| 45075 | "/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n" |
| 45076 | "/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n" |
| 45077 | "/// vector are unused.\n" |
| 45078 | "///\n" |
| 45079 | "/// \\headerfile <x86intrin.h>\n" |
| 45080 | "///\n" |
| 45081 | "/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction.\n" |
| 45082 | "///\n" |
| 45083 | "/// \\param __V\n" |
| 45084 | "/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n" |
| 45085 | "/// sign-extended to 64-bit values.\n" |
| 45086 | "/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n" |
| 45087 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45088 | "_mm_cvtepi8_epi64(__m128i __V)\n" |
| 45089 | "{\n" |
| 45090 | " /* This function always performs a signed extension, but __v16qi is a char\n" |
| 45091 | " which may be signed or unsigned, so use __v16qs. */\n" |
| 45092 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);\n" |
| 45093 | "}\n" |
| 45094 | "\n" |
| 45095 | "/// Sign-extends each of the lower four 16-bit integer elements of a\n" |
| 45096 | "/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n" |
| 45097 | "/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n" |
| 45098 | "/// vector are unused.\n" |
| 45099 | "///\n" |
| 45100 | "/// \\headerfile <x86intrin.h>\n" |
| 45101 | "///\n" |
| 45102 | "/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction.\n" |
| 45103 | "///\n" |
| 45104 | "/// \\param __V\n" |
| 45105 | "/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n" |
| 45106 | "/// sign-extended to 32-bit values.\n" |
| 45107 | "/// \\returns A 128-bit vector of [4 x i32] containing the sign-extended values.\n" |
| 45108 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45109 | "_mm_cvtepi16_epi32(__m128i __V)\n" |
| 45110 | "{\n" |
| 45111 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);\n" |
| 45112 | "}\n" |
| 45113 | "\n" |
| 45114 | "/// Sign-extends each of the lower two 16-bit integer elements of a\n" |
| 45115 | "/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n" |
| 45116 | "/// a 128-bit vector of [2 x i64]. The upper six elements of the input\n" |
| 45117 | "/// vector are unused.\n" |
| 45118 | "///\n" |
| 45119 | "/// \\headerfile <x86intrin.h>\n" |
| 45120 | "///\n" |
| 45121 | "/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction.\n" |
| 45122 | "///\n" |
| 45123 | "/// \\param __V\n" |
| 45124 | "/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n" |
| 45125 | "/// sign-extended to 64-bit values.\n" |
| 45126 | "/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n" |
| 45127 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45128 | "_mm_cvtepi16_epi64(__m128i __V)\n" |
| 45129 | "{\n" |
| 45130 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);\n" |
| 45131 | "}\n" |
| 45132 | "\n" |
| 45133 | "/// Sign-extends each of the lower two 32-bit integer elements of a\n" |
| 45134 | "/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n" |
| 45135 | "/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n" |
| 45136 | "/// are unused.\n" |
| 45137 | "///\n" |
| 45138 | "/// \\headerfile <x86intrin.h>\n" |
| 45139 | "///\n" |
| 45140 | "/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction.\n" |
| 45141 | "///\n" |
| 45142 | "/// \\param __V\n" |
| 45143 | "/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n" |
| 45144 | "/// sign-extended to 64-bit values.\n" |
| 45145 | "/// \\returns A 128-bit vector of [2 x i64] containing the sign-extended values.\n" |
| 45146 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45147 | "_mm_cvtepi32_epi64(__m128i __V)\n" |
| 45148 | "{\n" |
| 45149 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);\n" |
| 45150 | "}\n" |
| 45151 | "\n" |
| 45152 | "/* SSE4 Packed Integer Zero-Extension. */\n" |
| 45153 | "/// Zero-extends each of the lower eight 8-bit integer elements of a\n" |
| 45154 | "/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a\n" |
| 45155 | "/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector\n" |
| 45156 | "/// are unused.\n" |
| 45157 | "///\n" |
| 45158 | "/// \\headerfile <x86intrin.h>\n" |
| 45159 | "///\n" |
| 45160 | "/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction.\n" |
| 45161 | "///\n" |
| 45162 | "/// \\param __V\n" |
| 45163 | "/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are\n" |
| 45164 | "/// zero-extended to 16-bit values.\n" |
| 45165 | "/// \\returns A 128-bit vector of [8 x i16] containing the zero-extended values.\n" |
| 45166 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45167 | "_mm_cvtepu8_epi16(__m128i __V)\n" |
| 45168 | "{\n" |
| 45169 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);\n" |
| 45170 | "}\n" |
| 45171 | "\n" |
| 45172 | "/// Zero-extends each of the lower four 8-bit integer elements of a\n" |
| 45173 | "/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a\n" |
| 45174 | "/// 128-bit vector of [4 x i32]. The upper twelve elements of the input\n" |
| 45175 | "/// vector are unused.\n" |
| 45176 | "///\n" |
| 45177 | "/// \\headerfile <x86intrin.h>\n" |
| 45178 | "///\n" |
| 45179 | "/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction.\n" |
| 45180 | "///\n" |
| 45181 | "/// \\param __V\n" |
| 45182 | "/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are\n" |
| 45183 | "/// zero-extended to 32-bit values.\n" |
| 45184 | "/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n" |
| 45185 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45186 | "_mm_cvtepu8_epi32(__m128i __V)\n" |
| 45187 | "{\n" |
| 45188 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si);\n" |
| 45189 | "}\n" |
| 45190 | "\n" |
| 45191 | "/// Zero-extends each of the lower two 8-bit integer elements of a\n" |
| 45192 | "/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in\n" |
| 45193 | "/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input\n" |
| 45194 | "/// vector are unused.\n" |
| 45195 | "///\n" |
| 45196 | "/// \\headerfile <x86intrin.h>\n" |
| 45197 | "///\n" |
| 45198 | "/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction.\n" |
| 45199 | "///\n" |
| 45200 | "/// \\param __V\n" |
| 45201 | "/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are\n" |
| 45202 | "/// zero-extended to 64-bit values.\n" |
| 45203 | "/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n" |
| 45204 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45205 | "_mm_cvtepu8_epi64(__m128i __V)\n" |
| 45206 | "{\n" |
| 45207 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di);\n" |
| 45208 | "}\n" |
| 45209 | "\n" |
| 45210 | "/// Zero-extends each of the lower four 16-bit integer elements of a\n" |
| 45211 | "/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in\n" |
| 45212 | "/// a 128-bit vector of [4 x i32]. The upper four elements of the input\n" |
| 45213 | "/// vector are unused.\n" |
| 45214 | "///\n" |
| 45215 | "/// \\headerfile <x86intrin.h>\n" |
| 45216 | "///\n" |
| 45217 | "/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction.\n" |
| 45218 | "///\n" |
| 45219 | "/// \\param __V\n" |
| 45220 | "/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are\n" |
| 45221 | "/// zero-extended to 32-bit values.\n" |
| 45222 | "/// \\returns A 128-bit vector of [4 x i32] containing the zero-extended values.\n" |
| 45223 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45224 | "_mm_cvtepu16_epi32(__m128i __V)\n" |
| 45225 | "{\n" |
| 45226 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si);\n" |
| 45227 | "}\n" |
| 45228 | "\n" |
| 45229 | "/// Zero-extends each of the lower two 16-bit integer elements of a\n" |
| 45230 | "/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in\n" |
| 45231 | "/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector\n" |
| 45232 | "/// are unused.\n" |
| 45233 | "///\n" |
| 45234 | "/// \\headerfile <x86intrin.h>\n" |
| 45235 | "///\n" |
| 45236 | "/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction.\n" |
| 45237 | "///\n" |
| 45238 | "/// \\param __V\n" |
| 45239 | "/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are\n" |
| 45240 | "/// zero-extended to 64-bit values.\n" |
| 45241 | "/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n" |
| 45242 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45243 | "_mm_cvtepu16_epi64(__m128i __V)\n" |
| 45244 | "{\n" |
| 45245 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di);\n" |
| 45246 | "}\n" |
| 45247 | "\n" |
| 45248 | "/// Zero-extends each of the lower two 32-bit integer elements of a\n" |
| 45249 | "/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in\n" |
| 45250 | "/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector\n" |
| 45251 | "/// are unused.\n" |
| 45252 | "///\n" |
| 45253 | "/// \\headerfile <x86intrin.h>\n" |
| 45254 | "///\n" |
| 45255 | "/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction.\n" |
| 45256 | "///\n" |
| 45257 | "/// \\param __V\n" |
| 45258 | "/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are\n" |
| 45259 | "/// zero-extended to 64-bit values.\n" |
| 45260 | "/// \\returns A 128-bit vector of [2 x i64] containing the zero-extended values.\n" |
| 45261 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45262 | "_mm_cvtepu32_epi64(__m128i __V)\n" |
| 45263 | "{\n" |
| 45264 | " return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di);\n" |
| 45265 | "}\n" |
| 45266 | "\n" |
| 45267 | "/* SSE4 Pack with Unsigned Saturation. */\n" |
| 45268 | "/// Converts 32-bit signed integers from both 128-bit integer vector\n" |
| 45269 | "/// operands into 16-bit unsigned integers, and returns the packed result.\n" |
| 45270 | "/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than\n" |
| 45271 | "/// 0x0000 are saturated to 0x0000.\n" |
| 45272 | "///\n" |
| 45273 | "/// \\headerfile <x86intrin.h>\n" |
| 45274 | "///\n" |
| 45275 | "/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.\n" |
| 45276 | "///\n" |
| 45277 | "/// \\param __V1\n" |
| 45278 | "/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n" |
| 45279 | "/// signed integer and is converted to a 16-bit unsigned integer with\n" |
| 45280 | "/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n" |
| 45281 | "/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n" |
| 45282 | "/// are written to the lower 64 bits of the result.\n" |
| 45283 | "/// \\param __V2\n" |
| 45284 | "/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a\n" |
| 45285 | "/// signed integer and is converted to a 16-bit unsigned integer with\n" |
| 45286 | "/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values\n" |
| 45287 | "/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values\n" |
| 45288 | "/// are written to the higher 64 bits of the result.\n" |
| 45289 | "/// \\returns A 128-bit vector of [8 x i16] containing the converted values.\n" |
| 45290 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45291 | "_mm_packus_epi32(__m128i __V1, __m128i __V2)\n" |
| 45292 | "{\n" |
| 45293 | " return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);\n" |
| 45294 | "}\n" |
| 45295 | "\n" |
| 45296 | "/* SSE4 Multiple Packed Sums of Absolute Difference. */\n" |
| 45297 | "/// Subtracts 8-bit unsigned integer values and computes the absolute\n" |
| 45298 | "/// values of the differences to the corresponding bits in the destination.\n" |
| 45299 | "/// Then sums of the absolute differences are returned according to the bit\n" |
| 45300 | "/// fields in the immediate operand.\n" |
| 45301 | "///\n" |
| 45302 | "/// \\headerfile <x86intrin.h>\n" |
| 45303 | "///\n" |
| 45304 | "/// \\code\n" |
| 45305 | "/// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M);\n" |
| 45306 | "/// \\endcode\n" |
| 45307 | "///\n" |
| 45308 | "/// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction.\n" |
| 45309 | "///\n" |
| 45310 | "/// \\param X\n" |
| 45311 | "/// A 128-bit vector of [16 x i8].\n" |
| 45312 | "/// \\param Y\n" |
| 45313 | "/// A 128-bit vector of [16 x i8].\n" |
| 45314 | "/// \\param M\n" |
| 45315 | "/// An 8-bit immediate operand specifying how the absolute differences are to\n" |
| 45316 | "/// be calculated, according to the following algorithm:\n" |
| 45317 | "/// \\code\n" |
| 45318 | "/// // M2 represents bit 2 of the immediate operand\n" |
| 45319 | "/// // M10 represents bits [1:0] of the immediate operand\n" |
| 45320 | "/// i = M2 * 4;\n" |
| 45321 | "/// j = M10 * 4;\n" |
| 45322 | "/// for (k = 0; k < 8; k = k + 1) {\n" |
| 45323 | "/// d0 = abs(X[i + k + 0] - Y[j + 0]);\n" |
| 45324 | "/// d1 = abs(X[i + k + 1] - Y[j + 1]);\n" |
| 45325 | "/// d2 = abs(X[i + k + 2] - Y[j + 2]);\n" |
| 45326 | "/// d3 = abs(X[i + k + 3] - Y[j + 3]);\n" |
| 45327 | "/// r[k] = d0 + d1 + d2 + d3;\n" |
| 45328 | "/// }\n" |
| 45329 | "/// \\endcode\n" |
| 45330 | "/// \\returns A 128-bit integer vector containing the sums of the sets of\n" |
| 45331 | "/// absolute differences between both operands.\n" |
| 45332 | "#define _mm_mpsadbw_epu8(X, Y, M) \\\n" |
| 45333 | " (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \\\n" |
| 45334 | " (__v16qi)(__m128i)(Y), (M))\n" |
| 45335 | "\n" |
| 45336 | "/// Finds the minimum unsigned 16-bit element in the input 128-bit\n" |
| 45337 | "/// vector of [8 x u16] and returns it and along with its index.\n" |
| 45338 | "///\n" |
| 45339 | "/// \\headerfile <x86intrin.h>\n" |
| 45340 | "///\n" |
| 45341 | "/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>\n" |
| 45342 | "/// instruction.\n" |
| 45343 | "///\n" |
| 45344 | "/// \\param __V\n" |
| 45345 | "/// A 128-bit vector of [8 x u16].\n" |
| 45346 | "/// \\returns A 128-bit value where bits [15:0] contain the minimum value found\n" |
| 45347 | "/// in parameter \\a __V, bits [18:16] contain the index of the minimum value\n" |
| 45348 | "/// and the remaining bits are set to 0.\n" |
| 45349 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 45350 | "_mm_minpos_epu16(__m128i __V)\n" |
| 45351 | "{\n" |
| 45352 | " return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);\n" |
| 45353 | "}\n" |
| 45354 | "\n" |
| 45355 | "/* Handle the sse4.2 definitions here. */\n" |
| 45356 | "\n" |
| 45357 | "/* These definitions are normally in nmmintrin.h, but gcc puts them in here\n" |
| 45358 | " so we'll do the same. */\n" |
| 45359 | "\n" |
| 45360 | "#undef __DEFAULT_FN_ATTRS\n" |
| 45361 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse4.2\")))\n" |
| 45362 | "\n" |
| 45363 | "/* These specify the type of data that we're comparing. */\n" |
| 45364 | "#define _SIDD_UBYTE_OPS 0x00\n" |
| 45365 | "#define _SIDD_UWORD_OPS 0x01\n" |
| 45366 | "#define _SIDD_SBYTE_OPS 0x02\n" |
| 45367 | "#define _SIDD_SWORD_OPS 0x03\n" |
| 45368 | "\n" |
| 45369 | "/* These specify the type of comparison operation. */\n" |
| 45370 | "#define _SIDD_CMP_EQUAL_ANY 0x00\n" |
| 45371 | "#define _SIDD_CMP_RANGES 0x04\n" |
| 45372 | "#define _SIDD_CMP_EQUAL_EACH 0x08\n" |
| 45373 | "#define _SIDD_CMP_EQUAL_ORDERED 0x0c\n" |
| 45374 | "\n" |
| 45375 | "/* These macros specify the polarity of the operation. */\n" |
| 45376 | "#define _SIDD_POSITIVE_POLARITY 0x00\n" |
| 45377 | "#define _SIDD_NEGATIVE_POLARITY 0x10\n" |
| 45378 | "#define _SIDD_MASKED_POSITIVE_POLARITY 0x20\n" |
| 45379 | "#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30\n" |
| 45380 | "\n" |
| 45381 | "/* These macros are used in _mm_cmpXstri() to specify the return. */\n" |
| 45382 | "#define _SIDD_LEAST_SIGNIFICANT 0x00\n" |
| 45383 | "#define _SIDD_MOST_SIGNIFICANT 0x40\n" |
| 45384 | "\n" |
| 45385 | "/* These macros are used in _mm_cmpXstri() to specify the return. */\n" |
| 45386 | "#define _SIDD_BIT_MASK 0x00\n" |
| 45387 | "#define _SIDD_UNIT_MASK 0x40\n" |
| 45388 | "\n" |
| 45389 | "/* SSE4.2 Packed Comparison Intrinsics. */\n" |
| 45390 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45391 | "/// data with implicitly defined lengths that is contained in source operands\n" |
| 45392 | "/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n" |
| 45393 | "/// mask of the comparison.\n" |
| 45394 | "///\n" |
| 45395 | "/// \\headerfile <x86intrin.h>\n" |
| 45396 | "///\n" |
| 45397 | "/// \\code\n" |
| 45398 | "/// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M);\n" |
| 45399 | "/// \\endcode\n" |
| 45400 | "///\n" |
| 45401 | "/// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c>\n" |
| 45402 | "/// instruction.\n" |
| 45403 | "///\n" |
| 45404 | "/// \\param A\n" |
| 45405 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45406 | "/// compared.\n" |
| 45407 | "/// \\param B\n" |
| 45408 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45409 | "/// compared.\n" |
| 45410 | "/// \\param M\n" |
| 45411 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45412 | "/// words, the type of comparison to perform, and the format of the return\n" |
| 45413 | "/// value. \\n\n" |
| 45414 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45415 | "/// 00: 16 unsigned bytes \\n\n" |
| 45416 | "/// 01: 8 unsigned words \\n\n" |
| 45417 | "/// 10: 16 signed bytes \\n\n" |
| 45418 | "/// 11: 8 signed words \\n\n" |
| 45419 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45420 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45421 | "/// the characters in \\a A. \\n\n" |
| 45422 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45423 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45424 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45425 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45426 | "/// \\a B for equality. \\n\n" |
| 45427 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 45428 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45429 | "/// mask of the comparison results. \\n\n" |
| 45430 | "/// 00: No effect. \\n\n" |
| 45431 | "/// 01: Negate the bit mask. \\n\n" |
| 45432 | "/// 10: No effect. \\n\n" |
| 45433 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45434 | "/// to the size of \\a A or \\a B. \\n\n" |
| 45435 | "/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n" |
| 45436 | "/// bytes. \\n\n" |
| 45437 | "/// 0: The result is zero-extended to 16 bytes. \\n\n" |
| 45438 | "/// 1: The result is expanded to 16 bytes (this expansion is performed by\n" |
| 45439 | "/// repeating each bit 8 or 16 times).\n" |
| 45440 | "/// \\returns Returns a 128-bit integer vector representing the result mask of\n" |
| 45441 | "/// the comparison.\n" |
| 45442 | "#define _mm_cmpistrm(A, B, M) \\\n" |
| 45443 | " (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \\\n" |
| 45444 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
| 45445 | "\n" |
| 45446 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45447 | "/// data with implicitly defined lengths that is contained in source operands\n" |
| 45448 | "/// \\a A and \\a B. Returns an integer representing the result index of the\n" |
| 45449 | "/// comparison.\n" |
| 45450 | "///\n" |
| 45451 | "/// \\headerfile <x86intrin.h>\n" |
| 45452 | "///\n" |
| 45453 | "/// \\code\n" |
| 45454 | "/// int _mm_cmpistri(__m128i A, __m128i B, const int M);\n" |
| 45455 | "/// \\endcode\n" |
| 45456 | "///\n" |
| 45457 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
| 45458 | "/// instruction.\n" |
| 45459 | "///\n" |
| 45460 | "/// \\param A\n" |
| 45461 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45462 | "/// compared.\n" |
| 45463 | "/// \\param B\n" |
| 45464 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45465 | "/// compared.\n" |
| 45466 | "/// \\param M\n" |
| 45467 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45468 | "/// words, the type of comparison to perform, and the format of the return\n" |
| 45469 | "/// value. \\n\n" |
| 45470 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45471 | "/// 00: 16 unsigned bytes \\n\n" |
| 45472 | "/// 01: 8 unsigned words \\n\n" |
| 45473 | "/// 10: 16 signed bytes \\n\n" |
| 45474 | "/// 11: 8 signed words \\n\n" |
| 45475 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45476 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45477 | "/// the characters in \\a A. \\n\n" |
| 45478 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45479 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45480 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45481 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45482 | "/// \\a B for equality. \\n\n" |
| 45483 | "/// 11: Substring: Search B for substring matches of \\a A. \\n\n" |
| 45484 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45485 | "/// mask of the comparison results. \\n\n" |
| 45486 | "/// 00: No effect. \\n\n" |
| 45487 | "/// 01: Negate the bit mask. \\n\n" |
| 45488 | "/// 10: No effect. \\n\n" |
| 45489 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45490 | "/// to the size of \\a A or \\a B. \\n\n" |
| 45491 | "/// Bit [6]: Determines whether the index of the lowest set bit or the\n" |
| 45492 | "/// highest set bit is returned. \\n\n" |
| 45493 | "/// 0: The index of the least significant set bit. \\n\n" |
| 45494 | "/// 1: The index of the most significant set bit. \\n\n" |
| 45495 | "/// \\returns Returns an integer representing the result index of the comparison.\n" |
| 45496 | "#define _mm_cmpistri(A, B, M) \\\n" |
| 45497 | " (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \\\n" |
| 45498 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
| 45499 | "\n" |
| 45500 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45501 | "/// data with explicitly defined lengths that is contained in source operands\n" |
| 45502 | "/// \\a A and \\a B. Returns a 128-bit integer vector representing the result\n" |
| 45503 | "/// mask of the comparison.\n" |
| 45504 | "///\n" |
| 45505 | "/// \\headerfile <x86intrin.h>\n" |
| 45506 | "///\n" |
| 45507 | "/// \\code\n" |
| 45508 | "/// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
| 45509 | "/// \\endcode\n" |
| 45510 | "///\n" |
| 45511 | "/// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c>\n" |
| 45512 | "/// instruction.\n" |
| 45513 | "///\n" |
| 45514 | "/// \\param A\n" |
| 45515 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45516 | "/// compared.\n" |
| 45517 | "/// \\param LA\n" |
| 45518 | "/// An integer that specifies the length of the string in \\a A.\n" |
| 45519 | "/// \\param B\n" |
| 45520 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45521 | "/// compared.\n" |
| 45522 | "/// \\param LB\n" |
| 45523 | "/// An integer that specifies the length of the string in \\a B.\n" |
| 45524 | "/// \\param M\n" |
| 45525 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45526 | "/// words, the type of comparison to perform, and the format of the return\n" |
| 45527 | "/// value. \\n\n" |
| 45528 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45529 | "/// 00: 16 unsigned bytes \\n\n" |
| 45530 | "/// 01: 8 unsigned words \\n\n" |
| 45531 | "/// 10: 16 signed bytes \\n\n" |
| 45532 | "/// 11: 8 signed words \\n\n" |
| 45533 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45534 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45535 | "/// the characters in \\a A. \\n\n" |
| 45536 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45537 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45538 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45539 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45540 | "/// \\a B for equality. \\n\n" |
| 45541 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 45542 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45543 | "/// mask of the comparison results. \\n\n" |
| 45544 | "/// 00: No effect. \\n\n" |
| 45545 | "/// 01: Negate the bit mask. \\n\n" |
| 45546 | "/// 10: No effect. \\n\n" |
| 45547 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45548 | "/// to the size of \\a A or \\a B. \\n\n" |
| 45549 | "/// Bit [6]: Determines whether the result is zero-extended or expanded to 16\n" |
| 45550 | "/// bytes. \\n\n" |
| 45551 | "/// 0: The result is zero-extended to 16 bytes. \\n\n" |
| 45552 | "/// 1: The result is expanded to 16 bytes (this expansion is performed by\n" |
| 45553 | "/// repeating each bit 8 or 16 times). \\n\n" |
| 45554 | "/// \\returns Returns a 128-bit integer vector representing the result mask of\n" |
| 45555 | "/// the comparison.\n" |
| 45556 | "#define _mm_cmpestrm(A, LA, B, LB, M) \\\n" |
| 45557 | " (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
| 45558 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
| 45559 | " (int)(M))\n" |
| 45560 | "\n" |
| 45561 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45562 | "/// data with explicitly defined lengths that is contained in source operands\n" |
| 45563 | "/// \\a A and \\a B. Returns an integer representing the result index of the\n" |
| 45564 | "/// comparison.\n" |
| 45565 | "///\n" |
| 45566 | "/// \\headerfile <x86intrin.h>\n" |
| 45567 | "///\n" |
| 45568 | "/// \\code\n" |
| 45569 | "/// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
| 45570 | "/// \\endcode\n" |
| 45571 | "///\n" |
| 45572 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
| 45573 | "/// instruction.\n" |
| 45574 | "///\n" |
| 45575 | "/// \\param A\n" |
| 45576 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45577 | "/// compared.\n" |
| 45578 | "/// \\param LA\n" |
| 45579 | "/// An integer that specifies the length of the string in \\a A.\n" |
| 45580 | "/// \\param B\n" |
| 45581 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45582 | "/// compared.\n" |
| 45583 | "/// \\param LB\n" |
| 45584 | "/// An integer that specifies the length of the string in \\a B.\n" |
| 45585 | "/// \\param M\n" |
| 45586 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45587 | "/// words, the type of comparison to perform, and the format of the return\n" |
| 45588 | "/// value. \\n\n" |
| 45589 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45590 | "/// 00: 16 unsigned bytes \\n\n" |
| 45591 | "/// 01: 8 unsigned words \\n\n" |
| 45592 | "/// 10: 16 signed bytes \\n\n" |
| 45593 | "/// 11: 8 signed words \\n\n" |
| 45594 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45595 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45596 | "/// the characters in \\a A. \\n\n" |
| 45597 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45598 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45599 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45600 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45601 | "/// \\a B for equality. \\n\n" |
| 45602 | "/// 11: Substring: Search B for substring matches of \\a A. \\n\n" |
| 45603 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45604 | "/// mask of the comparison results. \\n\n" |
| 45605 | "/// 00: No effect. \\n\n" |
| 45606 | "/// 01: Negate the bit mask. \\n\n" |
| 45607 | "/// 10: No effect. \\n\n" |
| 45608 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45609 | "/// to the size of \\a A or \\a B. \\n\n" |
| 45610 | "/// Bit [6]: Determines whether the index of the lowest set bit or the\n" |
| 45611 | "/// highest set bit is returned. \\n\n" |
| 45612 | "/// 0: The index of the least significant set bit. \\n\n" |
| 45613 | "/// 1: The index of the most significant set bit. \\n\n" |
| 45614 | "/// \\returns Returns an integer representing the result index of the comparison.\n" |
| 45615 | "#define _mm_cmpestri(A, LA, B, LB, M) \\\n" |
| 45616 | " (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
| 45617 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
| 45618 | " (int)(M))\n" |
| 45619 | "\n" |
| 45620 | "/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */\n" |
| 45621 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45622 | "/// data with implicitly defined lengths that is contained in source operands\n" |
| 45623 | "/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n" |
| 45624 | "/// string in \\a B is the maximum, otherwise, returns 0.\n" |
| 45625 | "///\n" |
| 45626 | "/// \\headerfile <x86intrin.h>\n" |
| 45627 | "///\n" |
| 45628 | "/// \\code\n" |
| 45629 | "/// int _mm_cmpistra(__m128i A, __m128i B, const int M);\n" |
| 45630 | "/// \\endcode\n" |
| 45631 | "///\n" |
| 45632 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
| 45633 | "/// instruction.\n" |
| 45634 | "///\n" |
| 45635 | "/// \\param A\n" |
| 45636 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45637 | "/// compared.\n" |
| 45638 | "/// \\param B\n" |
| 45639 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45640 | "/// compared.\n" |
| 45641 | "/// \\param M\n" |
| 45642 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45643 | "/// words and the type of comparison to perform. \\n\n" |
| 45644 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45645 | "/// 00: 16 unsigned bytes \\n\n" |
| 45646 | "/// 01: 8 unsigned words \\n\n" |
| 45647 | "/// 10: 16 signed bytes \\n\n" |
| 45648 | "/// 11: 8 signed words \\n\n" |
| 45649 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45650 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45651 | "/// the characters in \\a A. \\n\n" |
| 45652 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45653 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45654 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45655 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45656 | "/// \\a B for equality. \\n\n" |
| 45657 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 45658 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45659 | "/// mask of the comparison results. \\n\n" |
| 45660 | "/// 00: No effect. \\n\n" |
| 45661 | "/// 01: Negate the bit mask. \\n\n" |
| 45662 | "/// 10: No effect. \\n\n" |
| 45663 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45664 | "/// to the size of \\a A or \\a B. \\n\n" |
| 45665 | "/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n" |
| 45666 | "/// \\a B is the maximum; otherwise, returns 0.\n" |
| 45667 | "#define _mm_cmpistra(A, B, M) \\\n" |
| 45668 | " (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \\\n" |
| 45669 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
| 45670 | "\n" |
| 45671 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45672 | "/// data with implicitly defined lengths that is contained in source operands\n" |
| 45673 | "/// \\a A and \\a B. Returns 1 if the bit mask is non-zero, otherwise, returns\n" |
| 45674 | "/// 0.\n" |
| 45675 | "///\n" |
| 45676 | "/// \\headerfile <x86intrin.h>\n" |
| 45677 | "///\n" |
| 45678 | "/// \\code\n" |
| 45679 | "/// int _mm_cmpistrc(__m128i A, __m128i B, const int M);\n" |
| 45680 | "/// \\endcode\n" |
| 45681 | "///\n" |
| 45682 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
| 45683 | "/// instruction.\n" |
| 45684 | "///\n" |
| 45685 | "/// \\param A\n" |
| 45686 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45687 | "/// compared.\n" |
| 45688 | "/// \\param B\n" |
| 45689 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45690 | "/// compared.\n" |
| 45691 | "/// \\param M\n" |
| 45692 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45693 | "/// words and the type of comparison to perform. \\n\n" |
| 45694 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45695 | "/// 00: 16 unsigned bytes \\n\n" |
| 45696 | "/// 01: 8 unsigned words \\n\n" |
| 45697 | "/// 10: 16 signed bytes \\n\n" |
| 45698 | "/// 11: 8 signed words \\n\n" |
| 45699 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45700 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45701 | "/// the characters in \\a A. \\n\n" |
| 45702 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45703 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45704 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45705 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45706 | "/// \\a B for equality. \\n\n" |
| 45707 | "/// 11: Substring: Search B for substring matches of \\a A. \\n\n" |
| 45708 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45709 | "/// mask of the comparison results. \\n\n" |
| 45710 | "/// 00: No effect. \\n\n" |
| 45711 | "/// 01: Negate the bit mask. \\n\n" |
| 45712 | "/// 10: No effect. \\n\n" |
| 45713 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45714 | "/// to the size of \\a A or \\a B.\n" |
| 45715 | "/// \\returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.\n" |
| 45716 | "#define _mm_cmpistrc(A, B, M) \\\n" |
| 45717 | " (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \\\n" |
| 45718 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
| 45719 | "\n" |
| 45720 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45721 | "/// data with implicitly defined lengths that is contained in source operands\n" |
| 45722 | "/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n" |
| 45723 | "///\n" |
| 45724 | "/// \\headerfile <x86intrin.h>\n" |
| 45725 | "///\n" |
| 45726 | "/// \\code\n" |
| 45727 | "/// int _mm_cmpistro(__m128i A, __m128i B, const int M);\n" |
| 45728 | "/// \\endcode\n" |
| 45729 | "///\n" |
| 45730 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
| 45731 | "/// instruction.\n" |
| 45732 | "///\n" |
| 45733 | "/// \\param A\n" |
| 45734 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45735 | "/// compared.\n" |
| 45736 | "/// \\param B\n" |
| 45737 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45738 | "/// compared.\n" |
| 45739 | "/// \\param M\n" |
| 45740 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45741 | "/// words and the type of comparison to perform. \\n\n" |
| 45742 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45743 | "/// 00: 16 unsigned bytes \\n\n" |
| 45744 | "/// 01: 8 unsigned words \\n\n" |
| 45745 | "/// 10: 16 signed bytes \\n\n" |
| 45746 | "/// 11: 8 signed words \\n\n" |
| 45747 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45748 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45749 | "/// the characters in \\a A. \\n\n" |
| 45750 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45751 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45752 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45753 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45754 | "/// \\a B for equality. \\n\n" |
| 45755 | "/// 11: Substring: Search B for substring matches of \\a A. \\n\n" |
| 45756 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45757 | "/// mask of the comparison results. \\n\n" |
| 45758 | "/// 00: No effect. \\n\n" |
| 45759 | "/// 01: Negate the bit mask. \\n\n" |
| 45760 | "/// 10: No effect. \\n\n" |
| 45761 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45762 | "/// to the size of \\a A or \\a B. \\n\n" |
| 45763 | "/// \\returns Returns bit 0 of the resulting bit mask.\n" |
| 45764 | "#define _mm_cmpistro(A, B, M) \\\n" |
| 45765 | " (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \\\n" |
| 45766 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
| 45767 | "\n" |
| 45768 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45769 | "/// data with implicitly defined lengths that is contained in source operands\n" |
| 45770 | "/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n" |
| 45771 | "/// the maximum, otherwise, returns 0.\n" |
| 45772 | "///\n" |
| 45773 | "/// \\headerfile <x86intrin.h>\n" |
| 45774 | "///\n" |
| 45775 | "/// \\code\n" |
| 45776 | "/// int _mm_cmpistrs(__m128i A, __m128i B, const int M);\n" |
| 45777 | "/// \\endcode\n" |
| 45778 | "///\n" |
| 45779 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
| 45780 | "/// instruction.\n" |
| 45781 | "///\n" |
| 45782 | "/// \\param A\n" |
| 45783 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45784 | "/// compared.\n" |
| 45785 | "/// \\param B\n" |
| 45786 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45787 | "/// compared.\n" |
| 45788 | "/// \\param M\n" |
| 45789 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45790 | "/// words and the type of comparison to perform. \\n\n" |
| 45791 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45792 | "/// 00: 16 unsigned bytes \\n\n" |
| 45793 | "/// 01: 8 unsigned words \\n\n" |
| 45794 | "/// 10: 16 signed bytes \\n\n" |
| 45795 | "/// 11: 8 signed words \\n\n" |
| 45796 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45797 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45798 | "/// the characters in \\a A. \\n\n" |
| 45799 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45800 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45801 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45802 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45803 | "/// \\a B for equality. \\n\n" |
| 45804 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 45805 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45806 | "/// mask of the comparison results. \\n\n" |
| 45807 | "/// 00: No effect. \\n\n" |
| 45808 | "/// 01: Negate the bit mask. \\n\n" |
| 45809 | "/// 10: No effect. \\n\n" |
| 45810 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45811 | "/// to the size of \\a A or \\a B. \\n\n" |
| 45812 | "/// \\returns Returns 1 if the length of the string in \\a A is less than the\n" |
| 45813 | "/// maximum, otherwise, returns 0.\n" |
| 45814 | "#define _mm_cmpistrs(A, B, M) \\\n" |
| 45815 | " (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \\\n" |
| 45816 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
| 45817 | "\n" |
| 45818 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45819 | "/// data with implicitly defined lengths that is contained in source operands\n" |
| 45820 | "/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n" |
| 45821 | "/// the maximum, otherwise, returns 0.\n" |
| 45822 | "///\n" |
| 45823 | "/// \\headerfile <x86intrin.h>\n" |
| 45824 | "///\n" |
| 45825 | "/// \\code\n" |
| 45826 | "/// int _mm_cmpistrz(__m128i A, __m128i B, const int M);\n" |
| 45827 | "/// \\endcode\n" |
| 45828 | "///\n" |
| 45829 | "/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c>\n" |
| 45830 | "/// instruction.\n" |
| 45831 | "///\n" |
| 45832 | "/// \\param A\n" |
| 45833 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45834 | "/// compared.\n" |
| 45835 | "/// \\param B\n" |
| 45836 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45837 | "/// compared.\n" |
| 45838 | "/// \\param M\n" |
| 45839 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45840 | "/// words and the type of comparison to perform. \\n\n" |
| 45841 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45842 | "/// 00: 16 unsigned bytes \\n\n" |
| 45843 | "/// 01: 8 unsigned words \\n\n" |
| 45844 | "/// 10: 16 signed bytes \\n\n" |
| 45845 | "/// 11: 8 signed words \\n\n" |
| 45846 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45847 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45848 | "/// the characters in \\a A. \\n\n" |
| 45849 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45850 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45851 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45852 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45853 | "/// \\a B for equality. \\n\n" |
| 45854 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 45855 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45856 | "/// mask of the comparison results. \\n\n" |
| 45857 | "/// 00: No effect. \\n\n" |
| 45858 | "/// 01: Negate the bit mask. \\n\n" |
| 45859 | "/// 10: No effect. \\n\n" |
| 45860 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45861 | "/// to the size of \\a A or \\a B.\n" |
| 45862 | "/// \\returns Returns 1 if the length of the string in \\a B is less than the\n" |
| 45863 | "/// maximum, otherwise, returns 0.\n" |
| 45864 | "#define _mm_cmpistrz(A, B, M) \\\n" |
| 45865 | " (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \\\n" |
| 45866 | " (__v16qi)(__m128i)(B), (int)(M))\n" |
| 45867 | "\n" |
| 45868 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45869 | "/// data with explicitly defined lengths that is contained in source operands\n" |
| 45870 | "/// \\a A and \\a B. Returns 1 if the bit mask is zero and the length of the\n" |
| 45871 | "/// string in \\a B is the maximum, otherwise, returns 0.\n" |
| 45872 | "///\n" |
| 45873 | "/// \\headerfile <x86intrin.h>\n" |
| 45874 | "///\n" |
| 45875 | "/// \\code\n" |
| 45876 | "/// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
| 45877 | "/// \\endcode\n" |
| 45878 | "///\n" |
| 45879 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
| 45880 | "/// instruction.\n" |
| 45881 | "///\n" |
| 45882 | "/// \\param A\n" |
| 45883 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45884 | "/// compared.\n" |
| 45885 | "/// \\param LA\n" |
| 45886 | "/// An integer that specifies the length of the string in \\a A.\n" |
| 45887 | "/// \\param B\n" |
| 45888 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45889 | "/// compared.\n" |
| 45890 | "/// \\param LB\n" |
| 45891 | "/// An integer that specifies the length of the string in \\a B.\n" |
| 45892 | "/// \\param M\n" |
| 45893 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45894 | "/// words and the type of comparison to perform. \\n\n" |
| 45895 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45896 | "/// 00: 16 unsigned bytes \\n\n" |
| 45897 | "/// 01: 8 unsigned words \\n\n" |
| 45898 | "/// 10: 16 signed bytes \\n\n" |
| 45899 | "/// 11: 8 signed words \\n\n" |
| 45900 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45901 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45902 | "/// the characters in \\a A. \\n\n" |
| 45903 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45904 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45905 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45906 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45907 | "/// \\a B for equality. \\n\n" |
| 45908 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 45909 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45910 | "/// mask of the comparison results. \\n\n" |
| 45911 | "/// 00: No effect. \\n\n" |
| 45912 | "/// 01: Negate the bit mask. \\n\n" |
| 45913 | "/// 10: No effect. \\n\n" |
| 45914 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45915 | "/// to the size of \\a A or \\a B.\n" |
| 45916 | "/// \\returns Returns 1 if the bit mask is zero and the length of the string in\n" |
| 45917 | "/// \\a B is the maximum, otherwise, returns 0.\n" |
| 45918 | "#define _mm_cmpestra(A, LA, B, LB, M) \\\n" |
| 45919 | " (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
| 45920 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
| 45921 | " (int)(M))\n" |
| 45922 | "\n" |
| 45923 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45924 | "/// data with explicitly defined lengths that is contained in source operands\n" |
| 45925 | "/// \\a A and \\a B. Returns 1 if the resulting mask is non-zero, otherwise,\n" |
| 45926 | "/// returns 0.\n" |
| 45927 | "///\n" |
| 45928 | "/// \\headerfile <x86intrin.h>\n" |
| 45929 | "///\n" |
| 45930 | "/// \\code\n" |
| 45931 | "/// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
| 45932 | "/// \\endcode\n" |
| 45933 | "///\n" |
| 45934 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
| 45935 | "/// instruction.\n" |
| 45936 | "///\n" |
| 45937 | "/// \\param A\n" |
| 45938 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45939 | "/// compared.\n" |
| 45940 | "/// \\param LA\n" |
| 45941 | "/// An integer that specifies the length of the string in \\a A.\n" |
| 45942 | "/// \\param B\n" |
| 45943 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45944 | "/// compared.\n" |
| 45945 | "/// \\param LB\n" |
| 45946 | "/// An integer that specifies the length of the string in \\a B.\n" |
| 45947 | "/// \\param M\n" |
| 45948 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 45949 | "/// words and the type of comparison to perform. \\n\n" |
| 45950 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 45951 | "/// 00: 16 unsigned bytes \\n\n" |
| 45952 | "/// 01: 8 unsigned words \\n\n" |
| 45953 | "/// 10: 16 signed bytes \\n\n" |
| 45954 | "/// 11: 8 signed words \\n\n" |
| 45955 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 45956 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 45957 | "/// the characters in \\a A. \\n\n" |
| 45958 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 45959 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 45960 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 45961 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 45962 | "/// \\a B for equality. \\n\n" |
| 45963 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 45964 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 45965 | "/// mask of the comparison results. \\n\n" |
| 45966 | "/// 00: No effect. \\n\n" |
| 45967 | "/// 01: Negate the bit mask. \\n\n" |
| 45968 | "/// 10: No effect. \\n\n" |
| 45969 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 45970 | "/// to the size of \\a A or \\a B. \\n\n" |
| 45971 | "/// \\returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.\n" |
| 45972 | "#define _mm_cmpestrc(A, LA, B, LB, M) \\\n" |
| 45973 | " (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
| 45974 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
| 45975 | " (int)(M))\n" |
| 45976 | "\n" |
| 45977 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 45978 | "/// data with explicitly defined lengths that is contained in source operands\n" |
| 45979 | "/// \\a A and \\a B. Returns bit 0 of the resulting bit mask.\n" |
| 45980 | "///\n" |
| 45981 | "/// \\headerfile <x86intrin.h>\n" |
| 45982 | "///\n" |
| 45983 | "/// \\code\n" |
| 45984 | "/// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
| 45985 | "/// \\endcode\n" |
| 45986 | "///\n" |
| 45987 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
| 45988 | "/// instruction.\n" |
| 45989 | "///\n" |
| 45990 | "/// \\param A\n" |
| 45991 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45992 | "/// compared.\n" |
| 45993 | "/// \\param LA\n" |
| 45994 | "/// An integer that specifies the length of the string in \\a A.\n" |
| 45995 | "/// \\param B\n" |
| 45996 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 45997 | "/// compared.\n" |
| 45998 | "/// \\param LB\n" |
| 45999 | "/// An integer that specifies the length of the string in \\a B.\n" |
| 46000 | "/// \\param M\n" |
| 46001 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 46002 | "/// words and the type of comparison to perform. \\n\n" |
| 46003 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 46004 | "/// 00: 16 unsigned bytes \\n\n" |
| 46005 | "/// 01: 8 unsigned words \\n\n" |
| 46006 | "/// 10: 16 signed bytes \\n\n" |
| 46007 | "/// 11: 8 signed words \\n\n" |
| 46008 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 46009 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 46010 | "/// the characters in \\a A. \\n\n" |
| 46011 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 46012 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 46013 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 46014 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 46015 | "/// \\a B for equality. \\n\n" |
| 46016 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 46017 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 46018 | "/// mask of the comparison results. \\n\n" |
| 46019 | "/// 00: No effect. \\n\n" |
| 46020 | "/// 01: Negate the bit mask. \\n\n" |
| 46021 | "/// 10: No effect. \\n\n" |
| 46022 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 46023 | "/// to the size of \\a A or \\a B.\n" |
| 46024 | "/// \\returns Returns bit 0 of the resulting bit mask.\n" |
| 46025 | "#define _mm_cmpestro(A, LA, B, LB, M) \\\n" |
| 46026 | " (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
| 46027 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
| 46028 | " (int)(M))\n" |
| 46029 | "\n" |
| 46030 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 46031 | "/// data with explicitly defined lengths that is contained in source operands\n" |
| 46032 | "/// \\a A and \\a B. Returns 1 if the length of the string in \\a A is less than\n" |
| 46033 | "/// the maximum, otherwise, returns 0.\n" |
| 46034 | "///\n" |
| 46035 | "/// \\headerfile <x86intrin.h>\n" |
| 46036 | "///\n" |
| 46037 | "/// \\code\n" |
| 46038 | "/// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
| 46039 | "/// \\endcode\n" |
| 46040 | "///\n" |
| 46041 | "/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c>\n" |
| 46042 | "/// instruction.\n" |
| 46043 | "///\n" |
| 46044 | "/// \\param A\n" |
| 46045 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 46046 | "/// compared.\n" |
| 46047 | "/// \\param LA\n" |
| 46048 | "/// An integer that specifies the length of the string in \\a A.\n" |
| 46049 | "/// \\param B\n" |
| 46050 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 46051 | "/// compared.\n" |
| 46052 | "/// \\param LB\n" |
| 46053 | "/// An integer that specifies the length of the string in \\a B.\n" |
| 46054 | "/// \\param M\n" |
| 46055 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 46056 | "/// words and the type of comparison to perform. \\n\n" |
| 46057 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 46058 | "/// 00: 16 unsigned bytes \\n\n" |
| 46059 | "/// 01: 8 unsigned words \\n\n" |
| 46060 | "/// 10: 16 signed bytes \\n\n" |
| 46061 | "/// 11: 8 signed words \\n\n" |
| 46062 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 46063 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 46064 | "/// the characters in \\a A. \\n\n" |
| 46065 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 46066 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 46067 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 46068 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 46069 | "/// \\a B for equality. \\n\n" |
| 46070 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 46071 | "/// Bits [5:4]: Determine whether to perform a one's complement in the bit\n" |
| 46072 | "/// mask of the comparison results. \\n\n" |
| 46073 | "/// 00: No effect. \\n\n" |
| 46074 | "/// 01: Negate the bit mask. \\n\n" |
| 46075 | "/// 10: No effect. \\n\n" |
| 46076 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 46077 | "/// to the size of \\a A or \\a B. \\n\n" |
| 46078 | "/// \\returns Returns 1 if the length of the string in \\a A is less than the\n" |
| 46079 | "/// maximum, otherwise, returns 0.\n" |
| 46080 | "#define _mm_cmpestrs(A, LA, B, LB, M) \\\n" |
| 46081 | " (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
| 46082 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
| 46083 | " (int)(M))\n" |
| 46084 | "\n" |
| 46085 | "/// Uses the immediate operand \\a M to perform a comparison of string\n" |
| 46086 | "/// data with explicitly defined lengths that is contained in source operands\n" |
| 46087 | "/// \\a A and \\a B. Returns 1 if the length of the string in \\a B is less than\n" |
| 46088 | "/// the maximum, otherwise, returns 0.\n" |
| 46089 | "///\n" |
| 46090 | "/// \\headerfile <x86intrin.h>\n" |
| 46091 | "///\n" |
| 46092 | "/// \\code\n" |
| 46093 | "/// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M);\n" |
| 46094 | "/// \\endcode\n" |
| 46095 | "///\n" |
| 46096 | "/// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction.\n" |
| 46097 | "///\n" |
| 46098 | "/// \\param A\n" |
| 46099 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 46100 | "/// compared.\n" |
| 46101 | "/// \\param LA\n" |
| 46102 | "/// An integer that specifies the length of the string in \\a A.\n" |
| 46103 | "/// \\param B\n" |
| 46104 | "/// A 128-bit integer vector containing one of the source operands to be\n" |
| 46105 | "/// compared.\n" |
| 46106 | "/// \\param LB\n" |
| 46107 | "/// An integer that specifies the length of the string in \\a B.\n" |
| 46108 | "/// \\param M\n" |
| 46109 | "/// An 8-bit immediate operand specifying whether the characters are bytes or\n" |
| 46110 | "/// words and the type of comparison to perform. \\n\n" |
| 46111 | "/// Bits [1:0]: Determine source data format. \\n\n" |
| 46112 | "/// 00: 16 unsigned bytes \\n\n" |
| 46113 | "/// 01: 8 unsigned words \\n\n" |
| 46114 | "/// 10: 16 signed bytes \\n\n" |
| 46115 | "/// 11: 8 signed words \\n\n" |
| 46116 | "/// Bits [3:2]: Determine comparison type and aggregation method. \\n\n" |
| 46117 | "/// 00: Subset: Each character in \\a B is compared for equality with all\n" |
| 46118 | "/// the characters in \\a A. \\n\n" |
| 46119 | "/// 01: Ranges: Each character in \\a B is compared to \\a A. The comparison\n" |
| 46120 | "/// basis is greater than or equal for even-indexed elements in \\a A,\n" |
| 46121 | "/// and less than or equal for odd-indexed elements in \\a A. \\n\n" |
| 46122 | "/// 10: Match: Compare each pair of corresponding characters in \\a A and\n" |
| 46123 | "/// \\a B for equality. \\n\n" |
| 46124 | "/// 11: Substring: Search \\a B for substring matches of \\a A. \\n\n" |
| 46125 | "/// Bits [5:4]: Determine whether to perform a one's complement on the bit\n" |
| 46126 | "/// mask of the comparison results. \\n\n" |
| 46127 | "/// 00: No effect. \\n\n" |
| 46128 | "/// 01: Negate the bit mask. \\n\n" |
| 46129 | "/// 10: No effect. \\n\n" |
| 46130 | "/// 11: Negate the bit mask only for bits with an index less than or equal\n" |
| 46131 | "/// to the size of \\a A or \\a B.\n" |
| 46132 | "/// \\returns Returns 1 if the length of the string in \\a B is less than the\n" |
| 46133 | "/// maximum, otherwise, returns 0.\n" |
| 46134 | "#define _mm_cmpestrz(A, LA, B, LB, M) \\\n" |
| 46135 | " (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \\\n" |
| 46136 | " (__v16qi)(__m128i)(B), (int)(LB), \\\n" |
| 46137 | " (int)(M))\n" |
| 46138 | "\n" |
| 46139 | "/* SSE4.2 Compare Packed Data -- Greater Than. */\n" |
| 46140 | "/// Compares each of the corresponding 64-bit values of the 128-bit\n" |
| 46141 | "/// integer vectors to determine if the values in the first operand are\n" |
| 46142 | "/// greater than those in the second operand.\n" |
| 46143 | "///\n" |
| 46144 | "/// \\headerfile <x86intrin.h>\n" |
| 46145 | "///\n" |
| 46146 | "/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.\n" |
| 46147 | "///\n" |
| 46148 | "/// \\param __V1\n" |
| 46149 | "/// A 128-bit integer vector.\n" |
| 46150 | "/// \\param __V2\n" |
| 46151 | "/// A 128-bit integer vector.\n" |
| 46152 | "/// \\returns A 128-bit integer vector containing the comparison results.\n" |
| 46153 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 46154 | "_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)\n" |
| 46155 | "{\n" |
| 46156 | " return (__m128i)((__v2di)__V1 > (__v2di)__V2);\n" |
| 46157 | "}\n" |
| 46158 | "\n" |
| 46159 | "/* SSE4.2 Accumulate CRC32. */\n" |
| 46160 | "/// Adds the unsigned integer operand to the CRC-32C checksum of the\n" |
| 46161 | "/// unsigned char operand.\n" |
| 46162 | "///\n" |
| 46163 | "/// \\headerfile <x86intrin.h>\n" |
| 46164 | "///\n" |
| 46165 | "/// This intrinsic corresponds to the <c> CRC32B </c> instruction.\n" |
| 46166 | "///\n" |
| 46167 | "/// \\param __C\n" |
| 46168 | "/// An unsigned integer operand to add to the CRC-32C checksum of operand\n" |
| 46169 | "/// \\a __D.\n" |
| 46170 | "/// \\param __D\n" |
| 46171 | "/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.\n" |
| 46172 | "/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n" |
| 46173 | "/// operand \\a __D.\n" |
| 46174 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 46175 | "_mm_crc32_u8(unsigned int __C, unsigned char __D)\n" |
| 46176 | "{\n" |
| 46177 | " return __builtin_ia32_crc32qi(__C, __D);\n" |
| 46178 | "}\n" |
| 46179 | "\n" |
| 46180 | "/// Adds the unsigned integer operand to the CRC-32C checksum of the\n" |
| 46181 | "/// unsigned short operand.\n" |
| 46182 | "///\n" |
| 46183 | "/// \\headerfile <x86intrin.h>\n" |
| 46184 | "///\n" |
| 46185 | "/// This intrinsic corresponds to the <c> CRC32W </c> instruction.\n" |
| 46186 | "///\n" |
| 46187 | "/// \\param __C\n" |
| 46188 | "/// An unsigned integer operand to add to the CRC-32C checksum of operand\n" |
| 46189 | "/// \\a __D.\n" |
| 46190 | "/// \\param __D\n" |
| 46191 | "/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.\n" |
| 46192 | "/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n" |
| 46193 | "/// operand \\a __D.\n" |
| 46194 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 46195 | "_mm_crc32_u16(unsigned int __C, unsigned short __D)\n" |
| 46196 | "{\n" |
| 46197 | " return __builtin_ia32_crc32hi(__C, __D);\n" |
| 46198 | "}\n" |
| 46199 | "\n" |
| 46200 | "/// Adds the first unsigned integer operand to the CRC-32C checksum of\n" |
| 46201 | "/// the second unsigned integer operand.\n" |
| 46202 | "///\n" |
| 46203 | "/// \\headerfile <x86intrin.h>\n" |
| 46204 | "///\n" |
| 46205 | "/// This intrinsic corresponds to the <c> CRC32L </c> instruction.\n" |
| 46206 | "///\n" |
| 46207 | "/// \\param __C\n" |
| 46208 | "/// An unsigned integer operand to add to the CRC-32C checksum of operand\n" |
| 46209 | "/// \\a __D.\n" |
| 46210 | "/// \\param __D\n" |
| 46211 | "/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.\n" |
| 46212 | "/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n" |
| 46213 | "/// operand \\a __D.\n" |
| 46214 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 46215 | "_mm_crc32_u32(unsigned int __C, unsigned int __D)\n" |
| 46216 | "{\n" |
| 46217 | " return __builtin_ia32_crc32si(__C, __D);\n" |
| 46218 | "}\n" |
| 46219 | "\n" |
| 46220 | "#ifdef __x86_64__\n" |
| 46221 | "/// Adds the unsigned integer operand to the CRC-32C checksum of the\n" |
| 46222 | "/// unsigned 64-bit integer operand.\n" |
| 46223 | "///\n" |
| 46224 | "/// \\headerfile <x86intrin.h>\n" |
| 46225 | "///\n" |
| 46226 | "/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.\n" |
| 46227 | "///\n" |
| 46228 | "/// \\param __C\n" |
| 46229 | "/// An unsigned integer operand to add to the CRC-32C checksum of operand\n" |
| 46230 | "/// \\a __D.\n" |
| 46231 | "/// \\param __D\n" |
| 46232 | "/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.\n" |
| 46233 | "/// \\returns The result of adding operand \\a __C to the CRC-32C checksum of\n" |
| 46234 | "/// operand \\a __D.\n" |
| 46235 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 46236 | "_mm_crc32_u64(unsigned long long __C, unsigned long long __D)\n" |
| 46237 | "{\n" |
| 46238 | " return __builtin_ia32_crc32di(__C, __D);\n" |
| 46239 | "}\n" |
| 46240 | "#endif /* __x86_64__ */\n" |
| 46241 | "\n" |
| 46242 | "#undef __DEFAULT_FN_ATTRS\n" |
| 46243 | "\n" |
| 46244 | "#include <popcntintrin.h>\n" |
| 46245 | "\n" |
| 46246 | "#endif /* __SMMINTRIN_H */\n" |
| 46247 | "" } , |
| 46248 | { "/builtins/stdalign.h" , "/*===---- stdalign.h - Standard header for alignment ------------------------===\n" |
| 46249 | " *\n" |
| 46250 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 46251 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 46252 | " * in the Software without restriction, including without limitation the rights\n" |
| 46253 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 46254 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 46255 | " * furnished to do so, subject to the following conditions:\n" |
| 46256 | " *\n" |
| 46257 | " * The above copyright notice and this permission notice shall be included in\n" |
| 46258 | " * all copies or substantial portions of the Software.\n" |
| 46259 | " *\n" |
| 46260 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 46261 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 46262 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 46263 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 46264 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 46265 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 46266 | " * THE SOFTWARE.\n" |
| 46267 | " *\n" |
| 46268 | " *===-----------------------------------------------------------------------===\n" |
| 46269 | " */\n" |
| 46270 | "\n" |
| 46271 | "#ifndef __STDALIGN_H\n" |
| 46272 | "#define __STDALIGN_H\n" |
| 46273 | "\n" |
| 46274 | "#ifndef __cplusplus\n" |
| 46275 | "#define alignas _Alignas\n" |
| 46276 | "#define alignof _Alignof\n" |
| 46277 | "#endif\n" |
| 46278 | "\n" |
| 46279 | "#define __alignas_is_defined 1\n" |
| 46280 | "#define __alignof_is_defined 1\n" |
| 46281 | "\n" |
| 46282 | "#endif /* __STDALIGN_H */\n" |
| 46283 | "" } , |
| 46284 | { "/builtins/stdarg.h" , "/*===---- stdarg.h - Variable argument handling ----------------------------===\n" |
| 46285 | " *\n" |
| 46286 | " * Copyright (c) 2008 Eli Friedman\n" |
| 46287 | " *\n" |
| 46288 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 46289 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 46290 | " * in the Software without restriction, including without limitation the rights\n" |
| 46291 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 46292 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 46293 | " * furnished to do so, subject to the following conditions:\n" |
| 46294 | " *\n" |
| 46295 | " * The above copyright notice and this permission notice shall be included in\n" |
| 46296 | " * all copies or substantial portions of the Software.\n" |
| 46297 | " *\n" |
| 46298 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 46299 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 46300 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 46301 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 46302 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 46303 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 46304 | " * THE SOFTWARE.\n" |
| 46305 | " *\n" |
| 46306 | " *===-----------------------------------------------------------------------===\n" |
| 46307 | " */\n" |
| 46308 | "\n" |
| 46309 | "#ifndef __STDARG_H\n" |
| 46310 | "#define __STDARG_H\n" |
| 46311 | "\n" |
| 46312 | "#ifndef _VA_LIST\n" |
| 46313 | "typedef __builtin_va_list va_list;\n" |
| 46314 | "#define _VA_LIST\n" |
| 46315 | "#endif\n" |
| 46316 | "#define va_start(ap, param) __builtin_va_start(ap, param)\n" |
| 46317 | "#define va_end(ap) __builtin_va_end(ap)\n" |
| 46318 | "#define va_arg(ap, type) __builtin_va_arg(ap, type)\n" |
| 46319 | "\n" |
| 46320 | "/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode\n" |
| 46321 | " * or -ansi is not specified, since it was not part of C90.\n" |
| 46322 | " */\n" |
| 46323 | "#define __va_copy(d,s) __builtin_va_copy(d,s)\n" |
| 46324 | "\n" |
| 46325 | "#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__)\n" |
| 46326 | "#define va_copy(dest, src) __builtin_va_copy(dest, src)\n" |
| 46327 | "#endif\n" |
| 46328 | "\n" |
| 46329 | "#ifndef __GNUC_VA_LIST\n" |
| 46330 | "#define __GNUC_VA_LIST 1\n" |
| 46331 | "typedef __builtin_va_list __gnuc_va_list;\n" |
| 46332 | "#endif\n" |
| 46333 | "\n" |
| 46334 | "#endif /* __STDARG_H */\n" |
| 46335 | "" } , |
| 46336 | { "/builtins/stdatomic.h" , "/*===---- stdatomic.h - Standard header for atomic types and operations -----===\n" |
| 46337 | " *\n" |
| 46338 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 46339 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 46340 | " * in the Software without restriction, including without limitation the rights\n" |
| 46341 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 46342 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 46343 | " * furnished to do so, subject to the following conditions:\n" |
| 46344 | " *\n" |
| 46345 | " * The above copyright notice and this permission notice shall be included in\n" |
| 46346 | " * all copies or substantial portions of the Software.\n" |
| 46347 | " *\n" |
| 46348 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 46349 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 46350 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 46351 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 46352 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 46353 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 46354 | " * THE SOFTWARE.\n" |
| 46355 | " *\n" |
| 46356 | " *===-----------------------------------------------------------------------===\n" |
| 46357 | " */\n" |
| 46358 | "\n" |
| 46359 | "#ifndef __CLANG_STDATOMIC_H\n" |
| 46360 | "#define __CLANG_STDATOMIC_H\n" |
| 46361 | "\n" |
| 46362 | "/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for\n" |
| 46363 | " * example, already has a Clang-compatible stdatomic.h header.\n" |
| 46364 | " */\n" |
| 46365 | "#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>)\n" |
| 46366 | "# include_next <stdatomic.h>\n" |
| 46367 | "#else\n" |
| 46368 | "\n" |
| 46369 | "#include <stddef.h>\n" |
| 46370 | "#include <stdint.h>\n" |
| 46371 | "\n" |
| 46372 | "#ifdef __cplusplus\n" |
| 46373 | "extern \"C\" {\n" |
| 46374 | "#endif\n" |
| 46375 | "\n" |
| 46376 | "/* 7.17.1 Introduction */\n" |
| 46377 | "\n" |
| 46378 | "#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE\n" |
| 46379 | "#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE\n" |
| 46380 | "#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE\n" |
| 46381 | "#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE\n" |
| 46382 | "#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE\n" |
| 46383 | "#define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE\n" |
| 46384 | "#define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE\n" |
| 46385 | "#define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE\n" |
| 46386 | "#define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE\n" |
| 46387 | "#define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE\n" |
| 46388 | "\n" |
| 46389 | "/* 7.17.2 Initialization */\n" |
| 46390 | "\n" |
| 46391 | "#define ATOMIC_VAR_INIT(value) (value)\n" |
| 46392 | "#define atomic_init __c11_atomic_init\n" |
| 46393 | "\n" |
| 46394 | "/* 7.17.3 Order and consistency */\n" |
| 46395 | "\n" |
| 46396 | "typedef enum memory_order {\n" |
| 46397 | " memory_order_relaxed = __ATOMIC_RELAXED,\n" |
| 46398 | " memory_order_consume = __ATOMIC_CONSUME,\n" |
| 46399 | " memory_order_acquire = __ATOMIC_ACQUIRE,\n" |
| 46400 | " memory_order_release = __ATOMIC_RELEASE,\n" |
| 46401 | " memory_order_acq_rel = __ATOMIC_ACQ_REL,\n" |
| 46402 | " memory_order_seq_cst = __ATOMIC_SEQ_CST\n" |
| 46403 | "} memory_order;\n" |
| 46404 | "\n" |
| 46405 | "#define kill_dependency(y) (y)\n" |
| 46406 | "\n" |
| 46407 | "/* 7.17.4 Fences */\n" |
| 46408 | "\n" |
| 46409 | "/* These should be provided by the libc implementation. */\n" |
| 46410 | "void atomic_thread_fence(memory_order);\n" |
| 46411 | "void atomic_signal_fence(memory_order);\n" |
| 46412 | "\n" |
| 46413 | "#define atomic_thread_fence(order) __c11_atomic_thread_fence(order)\n" |
| 46414 | "#define atomic_signal_fence(order) __c11_atomic_signal_fence(order)\n" |
| 46415 | "\n" |
| 46416 | "/* 7.17.5 Lock-free property */\n" |
| 46417 | "\n" |
| 46418 | "#define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj)))\n" |
| 46419 | "\n" |
| 46420 | "/* 7.17.6 Atomic integer types */\n" |
| 46421 | "\n" |
| 46422 | "#ifdef __cplusplus\n" |
| 46423 | "typedef _Atomic(bool) atomic_bool;\n" |
| 46424 | "#else\n" |
| 46425 | "typedef _Atomic(_Bool) atomic_bool;\n" |
| 46426 | "#endif\n" |
| 46427 | "typedef _Atomic(char) atomic_char;\n" |
| 46428 | "typedef _Atomic(signed char) atomic_schar;\n" |
| 46429 | "typedef _Atomic(unsigned char) atomic_uchar;\n" |
| 46430 | "typedef _Atomic(short) atomic_short;\n" |
| 46431 | "typedef _Atomic(unsigned short) atomic_ushort;\n" |
| 46432 | "typedef _Atomic(int) atomic_int;\n" |
| 46433 | "typedef _Atomic(unsigned int) atomic_uint;\n" |
| 46434 | "typedef _Atomic(long) atomic_long;\n" |
| 46435 | "typedef _Atomic(unsigned long) atomic_ulong;\n" |
| 46436 | "typedef _Atomic(long long) atomic_llong;\n" |
| 46437 | "typedef _Atomic(unsigned long long) atomic_ullong;\n" |
| 46438 | "typedef _Atomic(uint_least16_t) atomic_char16_t;\n" |
| 46439 | "typedef _Atomic(uint_least32_t) atomic_char32_t;\n" |
| 46440 | "typedef _Atomic(wchar_t) atomic_wchar_t;\n" |
| 46441 | "typedef _Atomic(int_least8_t) atomic_int_least8_t;\n" |
| 46442 | "typedef _Atomic(uint_least8_t) atomic_uint_least8_t;\n" |
| 46443 | "typedef _Atomic(int_least16_t) atomic_int_least16_t;\n" |
| 46444 | "typedef _Atomic(uint_least16_t) atomic_uint_least16_t;\n" |
| 46445 | "typedef _Atomic(int_least32_t) atomic_int_least32_t;\n" |
| 46446 | "typedef _Atomic(uint_least32_t) atomic_uint_least32_t;\n" |
| 46447 | "typedef _Atomic(int_least64_t) atomic_int_least64_t;\n" |
| 46448 | "typedef _Atomic(uint_least64_t) atomic_uint_least64_t;\n" |
| 46449 | "typedef _Atomic(int_fast8_t) atomic_int_fast8_t;\n" |
| 46450 | "typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t;\n" |
| 46451 | "typedef _Atomic(int_fast16_t) atomic_int_fast16_t;\n" |
| 46452 | "typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t;\n" |
| 46453 | "typedef _Atomic(int_fast32_t) atomic_int_fast32_t;\n" |
| 46454 | "typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t;\n" |
| 46455 | "typedef _Atomic(int_fast64_t) atomic_int_fast64_t;\n" |
| 46456 | "typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t;\n" |
| 46457 | "typedef _Atomic(intptr_t) atomic_intptr_t;\n" |
| 46458 | "typedef _Atomic(uintptr_t) atomic_uintptr_t;\n" |
| 46459 | "typedef _Atomic(size_t) atomic_size_t;\n" |
| 46460 | "typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t;\n" |
| 46461 | "typedef _Atomic(intmax_t) atomic_intmax_t;\n" |
| 46462 | "typedef _Atomic(uintmax_t) atomic_uintmax_t;\n" |
| 46463 | "\n" |
| 46464 | "/* 7.17.7 Operations on atomic types */\n" |
| 46465 | "\n" |
| 46466 | "#define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST)\n" |
| 46467 | "#define atomic_store_explicit __c11_atomic_store\n" |
| 46468 | "\n" |
| 46469 | "#define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST)\n" |
| 46470 | "#define atomic_load_explicit __c11_atomic_load\n" |
| 46471 | "\n" |
| 46472 | "#define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST)\n" |
| 46473 | "#define atomic_exchange_explicit __c11_atomic_exchange\n" |
| 46474 | "\n" |
| 46475 | "#define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n" |
| 46476 | "#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong\n" |
| 46477 | "\n" |
| 46478 | "#define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)\n" |
| 46479 | "#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak\n" |
| 46480 | "\n" |
| 46481 | "#define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST)\n" |
| 46482 | "#define atomic_fetch_add_explicit __c11_atomic_fetch_add\n" |
| 46483 | "\n" |
| 46484 | "#define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST)\n" |
| 46485 | "#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub\n" |
| 46486 | "\n" |
| 46487 | "#define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST)\n" |
| 46488 | "#define atomic_fetch_or_explicit __c11_atomic_fetch_or\n" |
| 46489 | "\n" |
| 46490 | "#define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST)\n" |
| 46491 | "#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor\n" |
| 46492 | "\n" |
| 46493 | "#define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST)\n" |
| 46494 | "#define atomic_fetch_and_explicit __c11_atomic_fetch_and\n" |
| 46495 | "\n" |
| 46496 | "/* 7.17.8 Atomic flag type and operations */\n" |
| 46497 | "\n" |
| 46498 | "typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;\n" |
| 46499 | "\n" |
| 46500 | "#define ATOMIC_FLAG_INIT { 0 }\n" |
| 46501 | "\n" |
| 46502 | "/* These should be provided by the libc implementation. */\n" |
| 46503 | "#ifdef __cplusplus\n" |
| 46504 | "bool atomic_flag_test_and_set(volatile atomic_flag *);\n" |
| 46505 | "bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n" |
| 46506 | "#else\n" |
| 46507 | "_Bool atomic_flag_test_and_set(volatile atomic_flag *);\n" |
| 46508 | "_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);\n" |
| 46509 | "#endif\n" |
| 46510 | "void atomic_flag_clear(volatile atomic_flag *);\n" |
| 46511 | "void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order);\n" |
| 46512 | "\n" |
| 46513 | "#define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST)\n" |
| 46514 | "#define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order)\n" |
| 46515 | "\n" |
| 46516 | "#define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST)\n" |
| 46517 | "#define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order)\n" |
| 46518 | "\n" |
| 46519 | "#ifdef __cplusplus\n" |
| 46520 | "}\n" |
| 46521 | "#endif\n" |
| 46522 | "\n" |
| 46523 | "#endif /* __STDC_HOSTED__ */\n" |
| 46524 | "#endif /* __CLANG_STDATOMIC_H */\n" |
| 46525 | "\n" |
| 46526 | "" } , |
| 46527 | { "/builtins/stdbool.h" , "/*===---- stdbool.h - Standard header for booleans -------------------------===\n" |
| 46528 | " *\n" |
| 46529 | " * Copyright (c) 2008 Eli Friedman\n" |
| 46530 | " *\n" |
| 46531 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 46532 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 46533 | " * in the Software without restriction, including without limitation the rights\n" |
| 46534 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 46535 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 46536 | " * furnished to do so, subject to the following conditions:\n" |
| 46537 | " *\n" |
| 46538 | " * The above copyright notice and this permission notice shall be included in\n" |
| 46539 | " * all copies or substantial portions of the Software.\n" |
| 46540 | " *\n" |
| 46541 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 46542 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 46543 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 46544 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 46545 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 46546 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 46547 | " * THE SOFTWARE.\n" |
| 46548 | " *\n" |
| 46549 | " *===-----------------------------------------------------------------------===\n" |
| 46550 | " */\n" |
| 46551 | "\n" |
| 46552 | "#ifndef __STDBOOL_H\n" |
| 46553 | "#define __STDBOOL_H\n" |
| 46554 | "\n" |
| 46555 | "/* Don't define bool, true, and false in C++, except as a GNU extension. */\n" |
| 46556 | "#ifndef __cplusplus\n" |
| 46557 | "#define bool _Bool\n" |
| 46558 | "#define true 1\n" |
| 46559 | "#define false 0\n" |
| 46560 | "#elif defined(__GNUC__) && !defined(__STRICT_ANSI__)\n" |
| 46561 | "/* Define _Bool as a GNU extension. */\n" |
| 46562 | "#define _Bool bool\n" |
| 46563 | "#if __cplusplus < 201103L\n" |
| 46564 | "/* For C++98, define bool, false, true as a GNU extension. */\n" |
| 46565 | "#define bool bool\n" |
| 46566 | "#define false false\n" |
| 46567 | "#define true true\n" |
| 46568 | "#endif\n" |
| 46569 | "#endif\n" |
| 46570 | "\n" |
| 46571 | "#define __bool_true_false_are_defined 1\n" |
| 46572 | "\n" |
| 46573 | "#endif /* __STDBOOL_H */\n" |
| 46574 | "" } , |
| 46575 | { "/builtins/stddef.h" , "/*===---- stddef.h - Basic type definitions --------------------------------===\n" |
| 46576 | " *\n" |
| 46577 | " * Copyright (c) 2008 Eli Friedman\n" |
| 46578 | " *\n" |
| 46579 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 46580 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 46581 | " * in the Software without restriction, including without limitation the rights\n" |
| 46582 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 46583 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 46584 | " * furnished to do so, subject to the following conditions:\n" |
| 46585 | " *\n" |
| 46586 | " * The above copyright notice and this permission notice shall be included in\n" |
| 46587 | " * all copies or substantial portions of the Software.\n" |
| 46588 | " *\n" |
| 46589 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 46590 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 46591 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 46592 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 46593 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 46594 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 46595 | " * THE SOFTWARE.\n" |
| 46596 | " *\n" |
| 46597 | " *===-----------------------------------------------------------------------===\n" |
| 46598 | " */\n" |
| 46599 | "\n" |
| 46600 | "#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \\\n" |
| 46601 | " defined(__need_size_t) || defined(__need_wchar_t) || \\\n" |
| 46602 | " defined(__need_NULL) || defined(__need_wint_t)\n" |
| 46603 | "\n" |
| 46604 | "#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \\\n" |
| 46605 | " !defined(__need_wchar_t) && !defined(__need_NULL) && \\\n" |
| 46606 | " !defined(__need_wint_t)\n" |
| 46607 | "/* Always define miscellaneous pieces when modules are available. */\n" |
| 46608 | "#if !__has_feature(modules)\n" |
| 46609 | "#define __STDDEF_H\n" |
| 46610 | "#endif\n" |
| 46611 | "#define __need_ptrdiff_t\n" |
| 46612 | "#define __need_size_t\n" |
| 46613 | "#define __need_wchar_t\n" |
| 46614 | "#define __need_NULL\n" |
| 46615 | "#define __need_STDDEF_H_misc\n" |
| 46616 | "/* __need_wint_t is intentionally not defined here. */\n" |
| 46617 | "#endif\n" |
| 46618 | "\n" |
| 46619 | "#if defined(__need_ptrdiff_t)\n" |
| 46620 | "#if !defined(_PTRDIFF_T) || __has_feature(modules)\n" |
| 46621 | "/* Always define ptrdiff_t when modules are available. */\n" |
| 46622 | "#if !__has_feature(modules)\n" |
| 46623 | "#define _PTRDIFF_T\n" |
| 46624 | "#endif\n" |
| 46625 | "typedef __PTRDIFF_TYPE__ ptrdiff_t;\n" |
| 46626 | "#endif\n" |
| 46627 | "#undef __need_ptrdiff_t\n" |
| 46628 | "#endif /* defined(__need_ptrdiff_t) */\n" |
| 46629 | "\n" |
| 46630 | "#if defined(__need_size_t)\n" |
| 46631 | "#if !defined(_SIZE_T) || __has_feature(modules)\n" |
| 46632 | "/* Always define size_t when modules are available. */\n" |
| 46633 | "#if !__has_feature(modules)\n" |
| 46634 | "#define _SIZE_T\n" |
| 46635 | "#endif\n" |
| 46636 | "typedef __SIZE_TYPE__ size_t;\n" |
| 46637 | "#endif\n" |
| 46638 | "#undef __need_size_t\n" |
| 46639 | "#endif /*defined(__need_size_t) */\n" |
| 46640 | "\n" |
| 46641 | "#if defined(__need_STDDEF_H_misc)\n" |
| 46642 | "/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is\n" |
| 46643 | " * enabled. */\n" |
| 46644 | "#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \\\n" |
| 46645 | " !defined(_RSIZE_T)) || __has_feature(modules)\n" |
| 46646 | "/* Always define rsize_t when modules are available. */\n" |
| 46647 | "#if !__has_feature(modules)\n" |
| 46648 | "#define _RSIZE_T\n" |
| 46649 | "#endif\n" |
| 46650 | "typedef __SIZE_TYPE__ rsize_t;\n" |
| 46651 | "#endif\n" |
| 46652 | "#endif /* defined(__need_STDDEF_H_misc) */\n" |
| 46653 | "\n" |
| 46654 | "#if defined(__need_wchar_t)\n" |
| 46655 | "#ifndef __cplusplus\n" |
| 46656 | "/* Always define wchar_t when modules are available. */\n" |
| 46657 | "#if !defined(_WCHAR_T) || __has_feature(modules)\n" |
| 46658 | "#if !__has_feature(modules)\n" |
| 46659 | "#define _WCHAR_T\n" |
| 46660 | "#if defined(_MSC_EXTENSIONS)\n" |
| 46661 | "#define _WCHAR_T_DEFINED\n" |
| 46662 | "#endif\n" |
| 46663 | "#endif\n" |
| 46664 | "typedef __WCHAR_TYPE__ wchar_t;\n" |
| 46665 | "#endif\n" |
| 46666 | "#endif\n" |
| 46667 | "#undef __need_wchar_t\n" |
| 46668 | "#endif /* defined(__need_wchar_t) */\n" |
| 46669 | "\n" |
| 46670 | "#if defined(__need_NULL)\n" |
| 46671 | "#undef NULL\n" |
| 46672 | "#ifdef __cplusplus\n" |
| 46673 | "# if !defined(__MINGW32__) && !defined(_MSC_VER)\n" |
| 46674 | "# define NULL __null\n" |
| 46675 | "# else\n" |
| 46676 | "# define NULL 0\n" |
| 46677 | "# endif\n" |
| 46678 | "#else\n" |
| 46679 | "# define NULL ((void*)0)\n" |
| 46680 | "#endif\n" |
| 46681 | "#ifdef __cplusplus\n" |
| 46682 | "#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED)\n" |
| 46683 | "namespace std { typedef decltype(nullptr) nullptr_t; }\n" |
| 46684 | "using ::std::nullptr_t;\n" |
| 46685 | "#endif\n" |
| 46686 | "#endif\n" |
| 46687 | "#undef __need_NULL\n" |
| 46688 | "#endif /* defined(__need_NULL) */\n" |
| 46689 | "\n" |
| 46690 | "#if defined(__need_STDDEF_H_misc)\n" |
| 46691 | "#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L\n" |
| 46692 | "#include \"__stddef_max_align_t.h\"\n" |
| 46693 | "#endif\n" |
| 46694 | "#define offsetof(t, d) __builtin_offsetof(t, d)\n" |
| 46695 | "#undef __need_STDDEF_H_misc\n" |
| 46696 | "#endif /* defined(__need_STDDEF_H_misc) */\n" |
| 46697 | "\n" |
| 46698 | "/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use\n" |
| 46699 | "__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */\n" |
| 46700 | "#if defined(__need_wint_t)\n" |
| 46701 | "/* Always define wint_t when modules are available. */\n" |
| 46702 | "#if !defined(_WINT_T) || __has_feature(modules)\n" |
| 46703 | "#if !__has_feature(modules)\n" |
| 46704 | "#define _WINT_T\n" |
| 46705 | "#endif\n" |
| 46706 | "typedef __WINT_TYPE__ wint_t;\n" |
| 46707 | "#endif\n" |
| 46708 | "#undef __need_wint_t\n" |
| 46709 | "#endif /* __need_wint_t */\n" |
| 46710 | "\n" |
| 46711 | "#endif\n" |
| 46712 | "" } , |
| 46713 | { "/builtins/stdint.h" , "/*===---- stdint.h - Standard header for sized integer types --------------===*\\\n" |
| 46714 | " *\n" |
| 46715 | " * Copyright (c) 2009 Chris Lattner\n" |
| 46716 | " *\n" |
| 46717 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 46718 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 46719 | " * in the Software without restriction, including without limitation the rights\n" |
| 46720 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 46721 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 46722 | " * furnished to do so, subject to the following conditions:\n" |
| 46723 | " *\n" |
| 46724 | " * The above copyright notice and this permission notice shall be included in\n" |
| 46725 | " * all copies or substantial portions of the Software.\n" |
| 46726 | " *\n" |
| 46727 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 46728 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 46729 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 46730 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 46731 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 46732 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 46733 | " * THE SOFTWARE.\n" |
| 46734 | " *\n" |
| 46735 | "\\*===----------------------------------------------------------------------===*/\n" |
| 46736 | "\n" |
| 46737 | "\n" |
| 46738 | "/* If we're hosted, fall back to the system's stdint.h, which might have\n" |
| 46739 | " * additional definitions.\n" |
| 46740 | " */\n" |
| 46741 | "#if __STDC_HOSTED__ && __has_include_next(<stdint.h>)\n" |
| 46742 | "\n" |
| 46743 | "// C99 7.18.3 Limits of other integer types\n" |
| 46744 | "//\n" |
| 46745 | "// Footnote 219, 220: C++ implementations should define these macros only when\n" |
| 46746 | "// __STDC_LIMIT_MACROS is defined before <stdint.h> is included.\n" |
| 46747 | "//\n" |
| 46748 | "// Footnote 222: C++ implementations should define these macros only when\n" |
| 46749 | "// __STDC_CONSTANT_MACROS is defined before <stdint.h> is included.\n" |
| 46750 | "//\n" |
| 46751 | "// C++11 [cstdint.syn]p2:\n" |
| 46752 | "//\n" |
| 46753 | "// The macros defined by <cstdint> are provided unconditionally. In particular,\n" |
| 46754 | "// the symbols __STDC_LIMIT_MACROS and __STDC_CONSTANT_MACROS (mentioned in\n" |
| 46755 | "// footnotes 219, 220, and 222 in the C standard) play no role in C++.\n" |
| 46756 | "//\n" |
| 46757 | "// C11 removed the problematic footnotes.\n" |
| 46758 | "//\n" |
| 46759 | "// Work around this inconsistency by always defining those macros in C++ mode,\n" |
| 46760 | "// so that a C library implementation which follows the C99 standard can be\n" |
| 46761 | "// used in C++.\n" |
| 46762 | "# ifdef __cplusplus\n" |
| 46763 | "# if !defined(__STDC_LIMIT_MACROS)\n" |
| 46764 | "# define __STDC_LIMIT_MACROS\n" |
| 46765 | "# define __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n" |
| 46766 | "# endif\n" |
| 46767 | "# if !defined(__STDC_CONSTANT_MACROS)\n" |
| 46768 | "# define __STDC_CONSTANT_MACROS\n" |
| 46769 | "# define __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n" |
| 46770 | "# endif\n" |
| 46771 | "# endif\n" |
| 46772 | "\n" |
| 46773 | "# include_next <stdint.h>\n" |
| 46774 | "\n" |
| 46775 | "# ifdef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n" |
| 46776 | "# undef __STDC_LIMIT_MACROS\n" |
| 46777 | "# undef __STDC_LIMIT_MACROS_DEFINED_BY_CLANG\n" |
| 46778 | "# endif\n" |
| 46779 | "# ifdef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n" |
| 46780 | "# undef __STDC_CONSTANT_MACROS\n" |
| 46781 | "# undef __STDC_CONSTANT_MACROS_DEFINED_BY_CLANG\n" |
| 46782 | "# endif\n" |
| 46783 | "\n" |
| 46784 | "#else\n" |
| 46785 | "#ifndef __CLANG_STDINT_H2\n" |
| 46786 | "#define __CLANG_STDINT_H2\n" |
| 46787 | "\n" |
| 46788 | "/* C99 7.18.1.1 Exact-width integer types.\n" |
| 46789 | " * C99 7.18.1.2 Minimum-width integer types.\n" |
| 46790 | " * C99 7.18.1.3 Fastest minimum-width integer types.\n" |
| 46791 | " *\n" |
| 46792 | " * The standard requires that exact-width type be defined for 8-, 16-, 32-, and\n" |
| 46793 | " * 64-bit types if they are implemented. Other exact width types are optional.\n" |
| 46794 | " * This implementation defines an exact-width types for every integer width\n" |
| 46795 | " * that is represented in the standard integer types.\n" |
| 46796 | " *\n" |
| 46797 | " * The standard also requires minimum-width types be defined for 8-, 16-, 32-,\n" |
| 46798 | " * and 64-bit widths regardless of whether there are corresponding exact-width\n" |
| 46799 | " * types.\n" |
| 46800 | " *\n" |
| 46801 | " * To accommodate targets that are missing types that are exactly 8, 16, 32, or\n" |
| 46802 | " * 64 bits wide, this implementation takes an approach of cascading\n" |
| 46803 | " * redefinitions, redefining __int_leastN_t to successively smaller exact-width\n" |
| 46804 | " * types. It is therefore important that the types are defined in order of\n" |
| 46805 | " * descending widths.\n" |
| 46806 | " *\n" |
| 46807 | " * We currently assume that the minimum-width types and the fastest\n" |
| 46808 | " * minimum-width types are the same. This is allowed by the standard, but is\n" |
| 46809 | " * suboptimal.\n" |
| 46810 | " *\n" |
| 46811 | " * In violation of the standard, some targets do not implement a type that is\n" |
| 46812 | " * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).\n" |
| 46813 | " * To accommodate these targets, a required minimum-width type is only\n" |
| 46814 | " * defined if there exists an exact-width type of equal or greater width.\n" |
| 46815 | " */\n" |
| 46816 | "\n" |
| 46817 | "#ifdef __INT64_TYPE__\n" |
| 46818 | "# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/\n" |
| 46819 | "typedef __INT64_TYPE__ int64_t;\n" |
| 46820 | "# endif /* __int8_t_defined */\n" |
| 46821 | "typedef __UINT64_TYPE__ uint64_t;\n" |
| 46822 | "# define __int_least64_t int64_t\n" |
| 46823 | "# define __uint_least64_t uint64_t\n" |
| 46824 | "# define __int_least32_t int64_t\n" |
| 46825 | "# define __uint_least32_t uint64_t\n" |
| 46826 | "# define __int_least16_t int64_t\n" |
| 46827 | "# define __uint_least16_t uint64_t\n" |
| 46828 | "# define __int_least8_t int64_t\n" |
| 46829 | "# define __uint_least8_t uint64_t\n" |
| 46830 | "#endif /* __INT64_TYPE__ */\n" |
| 46831 | "\n" |
| 46832 | "#ifdef __int_least64_t\n" |
| 46833 | "typedef __int_least64_t int_least64_t;\n" |
| 46834 | "typedef __uint_least64_t uint_least64_t;\n" |
| 46835 | "typedef __int_least64_t int_fast64_t;\n" |
| 46836 | "typedef __uint_least64_t uint_fast64_t;\n" |
| 46837 | "#endif /* __int_least64_t */\n" |
| 46838 | "\n" |
| 46839 | "#ifdef __INT56_TYPE__\n" |
| 46840 | "typedef __INT56_TYPE__ int56_t;\n" |
| 46841 | "typedef __UINT56_TYPE__ uint56_t;\n" |
| 46842 | "typedef int56_t int_least56_t;\n" |
| 46843 | "typedef uint56_t uint_least56_t;\n" |
| 46844 | "typedef int56_t int_fast56_t;\n" |
| 46845 | "typedef uint56_t uint_fast56_t;\n" |
| 46846 | "# define __int_least32_t int56_t\n" |
| 46847 | "# define __uint_least32_t uint56_t\n" |
| 46848 | "# define __int_least16_t int56_t\n" |
| 46849 | "# define __uint_least16_t uint56_t\n" |
| 46850 | "# define __int_least8_t int56_t\n" |
| 46851 | "# define __uint_least8_t uint56_t\n" |
| 46852 | "#endif /* __INT56_TYPE__ */\n" |
| 46853 | "\n" |
| 46854 | "\n" |
| 46855 | "#ifdef __INT48_TYPE__\n" |
| 46856 | "typedef __INT48_TYPE__ int48_t;\n" |
| 46857 | "typedef __UINT48_TYPE__ uint48_t;\n" |
| 46858 | "typedef int48_t int_least48_t;\n" |
| 46859 | "typedef uint48_t uint_least48_t;\n" |
| 46860 | "typedef int48_t int_fast48_t;\n" |
| 46861 | "typedef uint48_t uint_fast48_t;\n" |
| 46862 | "# define __int_least32_t int48_t\n" |
| 46863 | "# define __uint_least32_t uint48_t\n" |
| 46864 | "# define __int_least16_t int48_t\n" |
| 46865 | "# define __uint_least16_t uint48_t\n" |
| 46866 | "# define __int_least8_t int48_t\n" |
| 46867 | "# define __uint_least8_t uint48_t\n" |
| 46868 | "#endif /* __INT48_TYPE__ */\n" |
| 46869 | "\n" |
| 46870 | "\n" |
| 46871 | "#ifdef __INT40_TYPE__\n" |
| 46872 | "typedef __INT40_TYPE__ int40_t;\n" |
| 46873 | "typedef __UINT40_TYPE__ uint40_t;\n" |
| 46874 | "typedef int40_t int_least40_t;\n" |
| 46875 | "typedef uint40_t uint_least40_t;\n" |
| 46876 | "typedef int40_t int_fast40_t;\n" |
| 46877 | "typedef uint40_t uint_fast40_t;\n" |
| 46878 | "# define __int_least32_t int40_t\n" |
| 46879 | "# define __uint_least32_t uint40_t\n" |
| 46880 | "# define __int_least16_t int40_t\n" |
| 46881 | "# define __uint_least16_t uint40_t\n" |
| 46882 | "# define __int_least8_t int40_t\n" |
| 46883 | "# define __uint_least8_t uint40_t\n" |
| 46884 | "#endif /* __INT40_TYPE__ */\n" |
| 46885 | "\n" |
| 46886 | "\n" |
| 46887 | "#ifdef __INT32_TYPE__\n" |
| 46888 | "\n" |
| 46889 | "# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/\n" |
| 46890 | "typedef __INT32_TYPE__ int32_t;\n" |
| 46891 | "# endif /* __int8_t_defined */\n" |
| 46892 | "\n" |
| 46893 | "# ifndef __uint32_t_defined /* more glibc compatibility */\n" |
| 46894 | "# define __uint32_t_defined\n" |
| 46895 | "typedef __UINT32_TYPE__ uint32_t;\n" |
| 46896 | "# endif /* __uint32_t_defined */\n" |
| 46897 | "\n" |
| 46898 | "# define __int_least32_t int32_t\n" |
| 46899 | "# define __uint_least32_t uint32_t\n" |
| 46900 | "# define __int_least16_t int32_t\n" |
| 46901 | "# define __uint_least16_t uint32_t\n" |
| 46902 | "# define __int_least8_t int32_t\n" |
| 46903 | "# define __uint_least8_t uint32_t\n" |
| 46904 | "#endif /* __INT32_TYPE__ */\n" |
| 46905 | "\n" |
| 46906 | "#ifdef __int_least32_t\n" |
| 46907 | "typedef __int_least32_t int_least32_t;\n" |
| 46908 | "typedef __uint_least32_t uint_least32_t;\n" |
| 46909 | "typedef __int_least32_t int_fast32_t;\n" |
| 46910 | "typedef __uint_least32_t uint_fast32_t;\n" |
| 46911 | "#endif /* __int_least32_t */\n" |
| 46912 | "\n" |
| 46913 | "#ifdef __INT24_TYPE__\n" |
| 46914 | "typedef __INT24_TYPE__ int24_t;\n" |
| 46915 | "typedef __UINT24_TYPE__ uint24_t;\n" |
| 46916 | "typedef int24_t int_least24_t;\n" |
| 46917 | "typedef uint24_t uint_least24_t;\n" |
| 46918 | "typedef int24_t int_fast24_t;\n" |
| 46919 | "typedef uint24_t uint_fast24_t;\n" |
| 46920 | "# define __int_least16_t int24_t\n" |
| 46921 | "# define __uint_least16_t uint24_t\n" |
| 46922 | "# define __int_least8_t int24_t\n" |
| 46923 | "# define __uint_least8_t uint24_t\n" |
| 46924 | "#endif /* __INT24_TYPE__ */\n" |
| 46925 | "\n" |
| 46926 | "#ifdef __INT16_TYPE__\n" |
| 46927 | "#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/\n" |
| 46928 | "typedef __INT16_TYPE__ int16_t;\n" |
| 46929 | "#endif /* __int8_t_defined */\n" |
| 46930 | "typedef __UINT16_TYPE__ uint16_t;\n" |
| 46931 | "# define __int_least16_t int16_t\n" |
| 46932 | "# define __uint_least16_t uint16_t\n" |
| 46933 | "# define __int_least8_t int16_t\n" |
| 46934 | "# define __uint_least8_t uint16_t\n" |
| 46935 | "#endif /* __INT16_TYPE__ */\n" |
| 46936 | "\n" |
| 46937 | "#ifdef __int_least16_t\n" |
| 46938 | "typedef __int_least16_t int_least16_t;\n" |
| 46939 | "typedef __uint_least16_t uint_least16_t;\n" |
| 46940 | "typedef __int_least16_t int_fast16_t;\n" |
| 46941 | "typedef __uint_least16_t uint_fast16_t;\n" |
| 46942 | "#endif /* __int_least16_t */\n" |
| 46943 | "\n" |
| 46944 | "\n" |
| 46945 | "#ifdef __INT8_TYPE__\n" |
| 46946 | "#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/\n" |
| 46947 | "typedef __INT8_TYPE__ int8_t;\n" |
| 46948 | "#endif /* __int8_t_defined */\n" |
| 46949 | "typedef __UINT8_TYPE__ uint8_t;\n" |
| 46950 | "# define __int_least8_t int8_t\n" |
| 46951 | "# define __uint_least8_t uint8_t\n" |
| 46952 | "#endif /* __INT8_TYPE__ */\n" |
| 46953 | "\n" |
| 46954 | "#ifdef __int_least8_t\n" |
| 46955 | "typedef __int_least8_t int_least8_t;\n" |
| 46956 | "typedef __uint_least8_t uint_least8_t;\n" |
| 46957 | "typedef __int_least8_t int_fast8_t;\n" |
| 46958 | "typedef __uint_least8_t uint_fast8_t;\n" |
| 46959 | "#endif /* __int_least8_t */\n" |
| 46960 | "\n" |
| 46961 | "/* prevent glibc sys/types.h from defining conflicting types */\n" |
| 46962 | "#ifndef __int8_t_defined\n" |
| 46963 | "# define __int8_t_defined\n" |
| 46964 | "#endif /* __int8_t_defined */\n" |
| 46965 | "\n" |
| 46966 | "/* C99 7.18.1.4 Integer types capable of holding object pointers.\n" |
| 46967 | " */\n" |
| 46968 | "#define __stdint_join3(a,b,c) a ## b ## c\n" |
| 46969 | "\n" |
| 46970 | "#ifndef _INTPTR_T\n" |
| 46971 | "#ifndef __intptr_t_defined\n" |
| 46972 | "typedef __INTPTR_TYPE__ intptr_t;\n" |
| 46973 | "#define __intptr_t_defined\n" |
| 46974 | "#define _INTPTR_T\n" |
| 46975 | "#endif\n" |
| 46976 | "#endif\n" |
| 46977 | "\n" |
| 46978 | "#ifndef _UINTPTR_T\n" |
| 46979 | "typedef __UINTPTR_TYPE__ uintptr_t;\n" |
| 46980 | "#define _UINTPTR_T\n" |
| 46981 | "#endif\n" |
| 46982 | "\n" |
| 46983 | "/* C99 7.18.1.5 Greatest-width integer types.\n" |
| 46984 | " */\n" |
| 46985 | "typedef __INTMAX_TYPE__ intmax_t;\n" |
| 46986 | "typedef __UINTMAX_TYPE__ uintmax_t;\n" |
| 46987 | "\n" |
| 46988 | "/* C99 7.18.4 Macros for minimum-width integer constants.\n" |
| 46989 | " *\n" |
| 46990 | " * The standard requires that integer constant macros be defined for all the\n" |
| 46991 | " * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width\n" |
| 46992 | " * types are required, the corresponding integer constant macros are defined\n" |
| 46993 | " * here. This implementation also defines minimum-width types for every other\n" |
| 46994 | " * integer width that the target implements, so corresponding macros are\n" |
| 46995 | " * defined below, too.\n" |
| 46996 | " *\n" |
| 46997 | " * These macros are defined using the same successive-shrinking approach as\n" |
| 46998 | " * the type definitions above. It is likewise important that macros are defined\n" |
| 46999 | " * in order of decending width.\n" |
| 47000 | " *\n" |
| 47001 | " * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the\n" |
| 47002 | " * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n" |
| 47003 | " */\n" |
| 47004 | "\n" |
| 47005 | "#define __int_c_join(a, b) a ## b\n" |
| 47006 | "#define __int_c(v, suffix) __int_c_join(v, suffix)\n" |
| 47007 | "#define __uint_c(v, suffix) __int_c_join(v##U, suffix)\n" |
| 47008 | "\n" |
| 47009 | "\n" |
| 47010 | "#ifdef __INT64_TYPE__\n" |
| 47011 | "# ifdef __INT64_C_SUFFIX__\n" |
| 47012 | "# define __int64_c_suffix __INT64_C_SUFFIX__\n" |
| 47013 | "# define __int32_c_suffix __INT64_C_SUFFIX__\n" |
| 47014 | "# define __int16_c_suffix __INT64_C_SUFFIX__\n" |
| 47015 | "# define __int8_c_suffix __INT64_C_SUFFIX__\n" |
| 47016 | "# else\n" |
| 47017 | "# undef __int64_c_suffix\n" |
| 47018 | "# undef __int32_c_suffix\n" |
| 47019 | "# undef __int16_c_suffix\n" |
| 47020 | "# undef __int8_c_suffix\n" |
| 47021 | "# endif /* __INT64_C_SUFFIX__ */\n" |
| 47022 | "#endif /* __INT64_TYPE__ */\n" |
| 47023 | "\n" |
| 47024 | "#ifdef __int_least64_t\n" |
| 47025 | "# ifdef __int64_c_suffix\n" |
| 47026 | "# define INT64_C(v) __int_c(v, __int64_c_suffix)\n" |
| 47027 | "# define UINT64_C(v) __uint_c(v, __int64_c_suffix)\n" |
| 47028 | "# else\n" |
| 47029 | "# define INT64_C(v) v\n" |
| 47030 | "# define UINT64_C(v) v ## U\n" |
| 47031 | "# endif /* __int64_c_suffix */\n" |
| 47032 | "#endif /* __int_least64_t */\n" |
| 47033 | "\n" |
| 47034 | "\n" |
| 47035 | "#ifdef __INT56_TYPE__\n" |
| 47036 | "# ifdef __INT56_C_SUFFIX__\n" |
| 47037 | "# define INT56_C(v) __int_c(v, __INT56_C_SUFFIX__)\n" |
| 47038 | "# define UINT56_C(v) __uint_c(v, __INT56_C_SUFFIX__)\n" |
| 47039 | "# define __int32_c_suffix __INT56_C_SUFFIX__\n" |
| 47040 | "# define __int16_c_suffix __INT56_C_SUFFIX__\n" |
| 47041 | "# define __int8_c_suffix __INT56_C_SUFFIX__\n" |
| 47042 | "# else\n" |
| 47043 | "# define INT56_C(v) v\n" |
| 47044 | "# define UINT56_C(v) v ## U\n" |
| 47045 | "# undef __int32_c_suffix\n" |
| 47046 | "# undef __int16_c_suffix\n" |
| 47047 | "# undef __int8_c_suffix\n" |
| 47048 | "# endif /* __INT56_C_SUFFIX__ */\n" |
| 47049 | "#endif /* __INT56_TYPE__ */\n" |
| 47050 | "\n" |
| 47051 | "\n" |
| 47052 | "#ifdef __INT48_TYPE__\n" |
| 47053 | "# ifdef __INT48_C_SUFFIX__\n" |
| 47054 | "# define INT48_C(v) __int_c(v, __INT48_C_SUFFIX__)\n" |
| 47055 | "# define UINT48_C(v) __uint_c(v, __INT48_C_SUFFIX__)\n" |
| 47056 | "# define __int32_c_suffix __INT48_C_SUFFIX__\n" |
| 47057 | "# define __int16_c_suffix __INT48_C_SUFFIX__\n" |
| 47058 | "# define __int8_c_suffix __INT48_C_SUFFIX__\n" |
| 47059 | "# else\n" |
| 47060 | "# define INT48_C(v) v\n" |
| 47061 | "# define UINT48_C(v) v ## U\n" |
| 47062 | "# undef __int32_c_suffix\n" |
| 47063 | "# undef __int16_c_suffix\n" |
| 47064 | "# undef __int8_c_suffix\n" |
| 47065 | "# endif /* __INT48_C_SUFFIX__ */\n" |
| 47066 | "#endif /* __INT48_TYPE__ */\n" |
| 47067 | "\n" |
| 47068 | "\n" |
| 47069 | "#ifdef __INT40_TYPE__\n" |
| 47070 | "# ifdef __INT40_C_SUFFIX__\n" |
| 47071 | "# define INT40_C(v) __int_c(v, __INT40_C_SUFFIX__)\n" |
| 47072 | "# define UINT40_C(v) __uint_c(v, __INT40_C_SUFFIX__)\n" |
| 47073 | "# define __int32_c_suffix __INT40_C_SUFFIX__\n" |
| 47074 | "# define __int16_c_suffix __INT40_C_SUFFIX__\n" |
| 47075 | "# define __int8_c_suffix __INT40_C_SUFFIX__\n" |
| 47076 | "# else\n" |
| 47077 | "# define INT40_C(v) v\n" |
| 47078 | "# define UINT40_C(v) v ## U\n" |
| 47079 | "# undef __int32_c_suffix\n" |
| 47080 | "# undef __int16_c_suffix\n" |
| 47081 | "# undef __int8_c_suffix\n" |
| 47082 | "# endif /* __INT40_C_SUFFIX__ */\n" |
| 47083 | "#endif /* __INT40_TYPE__ */\n" |
| 47084 | "\n" |
| 47085 | "\n" |
| 47086 | "#ifdef __INT32_TYPE__\n" |
| 47087 | "# ifdef __INT32_C_SUFFIX__\n" |
| 47088 | "# define __int32_c_suffix __INT32_C_SUFFIX__\n" |
| 47089 | "# define __int16_c_suffix __INT32_C_SUFFIX__\n" |
| 47090 | "# define __int8_c_suffix __INT32_C_SUFFIX__\n" |
| 47091 | "#else\n" |
| 47092 | "# undef __int32_c_suffix\n" |
| 47093 | "# undef __int16_c_suffix\n" |
| 47094 | "# undef __int8_c_suffix\n" |
| 47095 | "# endif /* __INT32_C_SUFFIX__ */\n" |
| 47096 | "#endif /* __INT32_TYPE__ */\n" |
| 47097 | "\n" |
| 47098 | "#ifdef __int_least32_t\n" |
| 47099 | "# ifdef __int32_c_suffix\n" |
| 47100 | "# define INT32_C(v) __int_c(v, __int32_c_suffix)\n" |
| 47101 | "# define UINT32_C(v) __uint_c(v, __int32_c_suffix)\n" |
| 47102 | "# else\n" |
| 47103 | "# define INT32_C(v) v\n" |
| 47104 | "# define UINT32_C(v) v ## U\n" |
| 47105 | "# endif /* __int32_c_suffix */\n" |
| 47106 | "#endif /* __int_least32_t */\n" |
| 47107 | "\n" |
| 47108 | "\n" |
| 47109 | "#ifdef __INT24_TYPE__\n" |
| 47110 | "# ifdef __INT24_C_SUFFIX__\n" |
| 47111 | "# define INT24_C(v) __int_c(v, __INT24_C_SUFFIX__)\n" |
| 47112 | "# define UINT24_C(v) __uint_c(v, __INT24_C_SUFFIX__)\n" |
| 47113 | "# define __int16_c_suffix __INT24_C_SUFFIX__\n" |
| 47114 | "# define __int8_c_suffix __INT24_C_SUFFIX__\n" |
| 47115 | "# else\n" |
| 47116 | "# define INT24_C(v) v\n" |
| 47117 | "# define UINT24_C(v) v ## U\n" |
| 47118 | "# undef __int16_c_suffix\n" |
| 47119 | "# undef __int8_c_suffix\n" |
| 47120 | "# endif /* __INT24_C_SUFFIX__ */\n" |
| 47121 | "#endif /* __INT24_TYPE__ */\n" |
| 47122 | "\n" |
| 47123 | "\n" |
| 47124 | "#ifdef __INT16_TYPE__\n" |
| 47125 | "# ifdef __INT16_C_SUFFIX__\n" |
| 47126 | "# define __int16_c_suffix __INT16_C_SUFFIX__\n" |
| 47127 | "# define __int8_c_suffix __INT16_C_SUFFIX__\n" |
| 47128 | "#else\n" |
| 47129 | "# undef __int16_c_suffix\n" |
| 47130 | "# undef __int8_c_suffix\n" |
| 47131 | "# endif /* __INT16_C_SUFFIX__ */\n" |
| 47132 | "#endif /* __INT16_TYPE__ */\n" |
| 47133 | "\n" |
| 47134 | "#ifdef __int_least16_t\n" |
| 47135 | "# ifdef __int16_c_suffix\n" |
| 47136 | "# define INT16_C(v) __int_c(v, __int16_c_suffix)\n" |
| 47137 | "# define UINT16_C(v) __uint_c(v, __int16_c_suffix)\n" |
| 47138 | "# else\n" |
| 47139 | "# define INT16_C(v) v\n" |
| 47140 | "# define UINT16_C(v) v ## U\n" |
| 47141 | "# endif /* __int16_c_suffix */\n" |
| 47142 | "#endif /* __int_least16_t */\n" |
| 47143 | "\n" |
| 47144 | "\n" |
| 47145 | "#ifdef __INT8_TYPE__\n" |
| 47146 | "# ifdef __INT8_C_SUFFIX__\n" |
| 47147 | "# define __int8_c_suffix __INT8_C_SUFFIX__\n" |
| 47148 | "#else\n" |
| 47149 | "# undef __int8_c_suffix\n" |
| 47150 | "# endif /* __INT8_C_SUFFIX__ */\n" |
| 47151 | "#endif /* __INT8_TYPE__ */\n" |
| 47152 | "\n" |
| 47153 | "#ifdef __int_least8_t\n" |
| 47154 | "# ifdef __int8_c_suffix\n" |
| 47155 | "# define INT8_C(v) __int_c(v, __int8_c_suffix)\n" |
| 47156 | "# define UINT8_C(v) __uint_c(v, __int8_c_suffix)\n" |
| 47157 | "# else\n" |
| 47158 | "# define INT8_C(v) v\n" |
| 47159 | "# define UINT8_C(v) v ## U\n" |
| 47160 | "# endif /* __int8_c_suffix */\n" |
| 47161 | "#endif /* __int_least8_t */\n" |
| 47162 | "\n" |
| 47163 | "\n" |
| 47164 | "/* C99 7.18.2.1 Limits of exact-width integer types.\n" |
| 47165 | " * C99 7.18.2.2 Limits of minimum-width integer types.\n" |
| 47166 | " * C99 7.18.2.3 Limits of fastest minimum-width integer types.\n" |
| 47167 | " *\n" |
| 47168 | " * The presence of limit macros are completely optional in C99. This\n" |
| 47169 | " * implementation defines limits for all of the types (exact- and\n" |
| 47170 | " * minimum-width) that it defines above, using the limits of the minimum-width\n" |
| 47171 | " * type for any types that do not have exact-width representations.\n" |
| 47172 | " *\n" |
| 47173 | " * As in the type definitions, this section takes an approach of\n" |
| 47174 | " * successive-shrinking to determine which limits to use for the standard (8,\n" |
| 47175 | " * 16, 32, 64) bit widths when they don't have exact representations. It is\n" |
| 47176 | " * therefore important that the definitions be kept in order of decending\n" |
| 47177 | " * widths.\n" |
| 47178 | " *\n" |
| 47179 | " * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the\n" |
| 47180 | " * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]).\n" |
| 47181 | " */\n" |
| 47182 | "\n" |
| 47183 | "#ifdef __INT64_TYPE__\n" |
| 47184 | "# define INT64_MAX INT64_C( 9223372036854775807)\n" |
| 47185 | "# define INT64_MIN (-INT64_C( 9223372036854775807)-1)\n" |
| 47186 | "# define UINT64_MAX UINT64_C(18446744073709551615)\n" |
| 47187 | "# define __INT_LEAST64_MIN INT64_MIN\n" |
| 47188 | "# define __INT_LEAST64_MAX INT64_MAX\n" |
| 47189 | "# define __UINT_LEAST64_MAX UINT64_MAX\n" |
| 47190 | "# define __INT_LEAST32_MIN INT64_MIN\n" |
| 47191 | "# define __INT_LEAST32_MAX INT64_MAX\n" |
| 47192 | "# define __UINT_LEAST32_MAX UINT64_MAX\n" |
| 47193 | "# define __INT_LEAST16_MIN INT64_MIN\n" |
| 47194 | "# define __INT_LEAST16_MAX INT64_MAX\n" |
| 47195 | "# define __UINT_LEAST16_MAX UINT64_MAX\n" |
| 47196 | "# define __INT_LEAST8_MIN INT64_MIN\n" |
| 47197 | "# define __INT_LEAST8_MAX INT64_MAX\n" |
| 47198 | "# define __UINT_LEAST8_MAX UINT64_MAX\n" |
| 47199 | "#endif /* __INT64_TYPE__ */\n" |
| 47200 | "\n" |
| 47201 | "#ifdef __INT_LEAST64_MIN\n" |
| 47202 | "# define INT_LEAST64_MIN __INT_LEAST64_MIN\n" |
| 47203 | "# define INT_LEAST64_MAX __INT_LEAST64_MAX\n" |
| 47204 | "# define UINT_LEAST64_MAX __UINT_LEAST64_MAX\n" |
| 47205 | "# define INT_FAST64_MIN __INT_LEAST64_MIN\n" |
| 47206 | "# define INT_FAST64_MAX __INT_LEAST64_MAX\n" |
| 47207 | "# define UINT_FAST64_MAX __UINT_LEAST64_MAX\n" |
| 47208 | "#endif /* __INT_LEAST64_MIN */\n" |
| 47209 | "\n" |
| 47210 | "\n" |
| 47211 | "#ifdef __INT56_TYPE__\n" |
| 47212 | "# define INT56_MAX INT56_C(36028797018963967)\n" |
| 47213 | "# define INT56_MIN (-INT56_C(36028797018963967)-1)\n" |
| 47214 | "# define UINT56_MAX UINT56_C(72057594037927935)\n" |
| 47215 | "# define INT_LEAST56_MIN INT56_MIN\n" |
| 47216 | "# define INT_LEAST56_MAX INT56_MAX\n" |
| 47217 | "# define UINT_LEAST56_MAX UINT56_MAX\n" |
| 47218 | "# define INT_FAST56_MIN INT56_MIN\n" |
| 47219 | "# define INT_FAST56_MAX INT56_MAX\n" |
| 47220 | "# define UINT_FAST56_MAX UINT56_MAX\n" |
| 47221 | "# define __INT_LEAST32_MIN INT56_MIN\n" |
| 47222 | "# define __INT_LEAST32_MAX INT56_MAX\n" |
| 47223 | "# define __UINT_LEAST32_MAX UINT56_MAX\n" |
| 47224 | "# define __INT_LEAST16_MIN INT56_MIN\n" |
| 47225 | "# define __INT_LEAST16_MAX INT56_MAX\n" |
| 47226 | "# define __UINT_LEAST16_MAX UINT56_MAX\n" |
| 47227 | "# define __INT_LEAST8_MIN INT56_MIN\n" |
| 47228 | "# define __INT_LEAST8_MAX INT56_MAX\n" |
| 47229 | "# define __UINT_LEAST8_MAX UINT56_MAX\n" |
| 47230 | "#endif /* __INT56_TYPE__ */\n" |
| 47231 | "\n" |
| 47232 | "\n" |
| 47233 | "#ifdef __INT48_TYPE__\n" |
| 47234 | "# define INT48_MAX INT48_C(140737488355327)\n" |
| 47235 | "# define INT48_MIN (-INT48_C(140737488355327)-1)\n" |
| 47236 | "# define UINT48_MAX UINT48_C(281474976710655)\n" |
| 47237 | "# define INT_LEAST48_MIN INT48_MIN\n" |
| 47238 | "# define INT_LEAST48_MAX INT48_MAX\n" |
| 47239 | "# define UINT_LEAST48_MAX UINT48_MAX\n" |
| 47240 | "# define INT_FAST48_MIN INT48_MIN\n" |
| 47241 | "# define INT_FAST48_MAX INT48_MAX\n" |
| 47242 | "# define UINT_FAST48_MAX UINT48_MAX\n" |
| 47243 | "# define __INT_LEAST32_MIN INT48_MIN\n" |
| 47244 | "# define __INT_LEAST32_MAX INT48_MAX\n" |
| 47245 | "# define __UINT_LEAST32_MAX UINT48_MAX\n" |
| 47246 | "# define __INT_LEAST16_MIN INT48_MIN\n" |
| 47247 | "# define __INT_LEAST16_MAX INT48_MAX\n" |
| 47248 | "# define __UINT_LEAST16_MAX UINT48_MAX\n" |
| 47249 | "# define __INT_LEAST8_MIN INT48_MIN\n" |
| 47250 | "# define __INT_LEAST8_MAX INT48_MAX\n" |
| 47251 | "# define __UINT_LEAST8_MAX UINT48_MAX\n" |
| 47252 | "#endif /* __INT48_TYPE__ */\n" |
| 47253 | "\n" |
| 47254 | "\n" |
| 47255 | "#ifdef __INT40_TYPE__\n" |
| 47256 | "# define INT40_MAX INT40_C(549755813887)\n" |
| 47257 | "# define INT40_MIN (-INT40_C(549755813887)-1)\n" |
| 47258 | "# define UINT40_MAX UINT40_C(1099511627775)\n" |
| 47259 | "# define INT_LEAST40_MIN INT40_MIN\n" |
| 47260 | "# define INT_LEAST40_MAX INT40_MAX\n" |
| 47261 | "# define UINT_LEAST40_MAX UINT40_MAX\n" |
| 47262 | "# define INT_FAST40_MIN INT40_MIN\n" |
| 47263 | "# define INT_FAST40_MAX INT40_MAX\n" |
| 47264 | "# define UINT_FAST40_MAX UINT40_MAX\n" |
| 47265 | "# define __INT_LEAST32_MIN INT40_MIN\n" |
| 47266 | "# define __INT_LEAST32_MAX INT40_MAX\n" |
| 47267 | "# define __UINT_LEAST32_MAX UINT40_MAX\n" |
| 47268 | "# define __INT_LEAST16_MIN INT40_MIN\n" |
| 47269 | "# define __INT_LEAST16_MAX INT40_MAX\n" |
| 47270 | "# define __UINT_LEAST16_MAX UINT40_MAX\n" |
| 47271 | "# define __INT_LEAST8_MIN INT40_MIN\n" |
| 47272 | "# define __INT_LEAST8_MAX INT40_MAX\n" |
| 47273 | "# define __UINT_LEAST8_MAX UINT40_MAX\n" |
| 47274 | "#endif /* __INT40_TYPE__ */\n" |
| 47275 | "\n" |
| 47276 | "\n" |
| 47277 | "#ifdef __INT32_TYPE__\n" |
| 47278 | "# define INT32_MAX INT32_C(2147483647)\n" |
| 47279 | "# define INT32_MIN (-INT32_C(2147483647)-1)\n" |
| 47280 | "# define UINT32_MAX UINT32_C(4294967295)\n" |
| 47281 | "# define __INT_LEAST32_MIN INT32_MIN\n" |
| 47282 | "# define __INT_LEAST32_MAX INT32_MAX\n" |
| 47283 | "# define __UINT_LEAST32_MAX UINT32_MAX\n" |
| 47284 | "# define __INT_LEAST16_MIN INT32_MIN\n" |
| 47285 | "# define __INT_LEAST16_MAX INT32_MAX\n" |
| 47286 | "# define __UINT_LEAST16_MAX UINT32_MAX\n" |
| 47287 | "# define __INT_LEAST8_MIN INT32_MIN\n" |
| 47288 | "# define __INT_LEAST8_MAX INT32_MAX\n" |
| 47289 | "# define __UINT_LEAST8_MAX UINT32_MAX\n" |
| 47290 | "#endif /* __INT32_TYPE__ */\n" |
| 47291 | "\n" |
| 47292 | "#ifdef __INT_LEAST32_MIN\n" |
| 47293 | "# define INT_LEAST32_MIN __INT_LEAST32_MIN\n" |
| 47294 | "# define INT_LEAST32_MAX __INT_LEAST32_MAX\n" |
| 47295 | "# define UINT_LEAST32_MAX __UINT_LEAST32_MAX\n" |
| 47296 | "# define INT_FAST32_MIN __INT_LEAST32_MIN\n" |
| 47297 | "# define INT_FAST32_MAX __INT_LEAST32_MAX\n" |
| 47298 | "# define UINT_FAST32_MAX __UINT_LEAST32_MAX\n" |
| 47299 | "#endif /* __INT_LEAST32_MIN */\n" |
| 47300 | "\n" |
| 47301 | "\n" |
| 47302 | "#ifdef __INT24_TYPE__\n" |
| 47303 | "# define INT24_MAX INT24_C(8388607)\n" |
| 47304 | "# define INT24_MIN (-INT24_C(8388607)-1)\n" |
| 47305 | "# define UINT24_MAX UINT24_C(16777215)\n" |
| 47306 | "# define INT_LEAST24_MIN INT24_MIN\n" |
| 47307 | "# define INT_LEAST24_MAX INT24_MAX\n" |
| 47308 | "# define UINT_LEAST24_MAX UINT24_MAX\n" |
| 47309 | "# define INT_FAST24_MIN INT24_MIN\n" |
| 47310 | "# define INT_FAST24_MAX INT24_MAX\n" |
| 47311 | "# define UINT_FAST24_MAX UINT24_MAX\n" |
| 47312 | "# define __INT_LEAST16_MIN INT24_MIN\n" |
| 47313 | "# define __INT_LEAST16_MAX INT24_MAX\n" |
| 47314 | "# define __UINT_LEAST16_MAX UINT24_MAX\n" |
| 47315 | "# define __INT_LEAST8_MIN INT24_MIN\n" |
| 47316 | "# define __INT_LEAST8_MAX INT24_MAX\n" |
| 47317 | "# define __UINT_LEAST8_MAX UINT24_MAX\n" |
| 47318 | "#endif /* __INT24_TYPE__ */\n" |
| 47319 | "\n" |
| 47320 | "\n" |
| 47321 | "#ifdef __INT16_TYPE__\n" |
| 47322 | "#define INT16_MAX INT16_C(32767)\n" |
| 47323 | "#define INT16_MIN (-INT16_C(32767)-1)\n" |
| 47324 | "#define UINT16_MAX UINT16_C(65535)\n" |
| 47325 | "# define __INT_LEAST16_MIN INT16_MIN\n" |
| 47326 | "# define __INT_LEAST16_MAX INT16_MAX\n" |
| 47327 | "# define __UINT_LEAST16_MAX UINT16_MAX\n" |
| 47328 | "# define __INT_LEAST8_MIN INT16_MIN\n" |
| 47329 | "# define __INT_LEAST8_MAX INT16_MAX\n" |
| 47330 | "# define __UINT_LEAST8_MAX UINT16_MAX\n" |
| 47331 | "#endif /* __INT16_TYPE__ */\n" |
| 47332 | "\n" |
| 47333 | "#ifdef __INT_LEAST16_MIN\n" |
| 47334 | "# define INT_LEAST16_MIN __INT_LEAST16_MIN\n" |
| 47335 | "# define INT_LEAST16_MAX __INT_LEAST16_MAX\n" |
| 47336 | "# define UINT_LEAST16_MAX __UINT_LEAST16_MAX\n" |
| 47337 | "# define INT_FAST16_MIN __INT_LEAST16_MIN\n" |
| 47338 | "# define INT_FAST16_MAX __INT_LEAST16_MAX\n" |
| 47339 | "# define UINT_FAST16_MAX __UINT_LEAST16_MAX\n" |
| 47340 | "#endif /* __INT_LEAST16_MIN */\n" |
| 47341 | "\n" |
| 47342 | "\n" |
| 47343 | "#ifdef __INT8_TYPE__\n" |
| 47344 | "# define INT8_MAX INT8_C(127)\n" |
| 47345 | "# define INT8_MIN (-INT8_C(127)-1)\n" |
| 47346 | "# define UINT8_MAX UINT8_C(255)\n" |
| 47347 | "# define __INT_LEAST8_MIN INT8_MIN\n" |
| 47348 | "# define __INT_LEAST8_MAX INT8_MAX\n" |
| 47349 | "# define __UINT_LEAST8_MAX UINT8_MAX\n" |
| 47350 | "#endif /* __INT8_TYPE__ */\n" |
| 47351 | "\n" |
| 47352 | "#ifdef __INT_LEAST8_MIN\n" |
| 47353 | "# define INT_LEAST8_MIN __INT_LEAST8_MIN\n" |
| 47354 | "# define INT_LEAST8_MAX __INT_LEAST8_MAX\n" |
| 47355 | "# define UINT_LEAST8_MAX __UINT_LEAST8_MAX\n" |
| 47356 | "# define INT_FAST8_MIN __INT_LEAST8_MIN\n" |
| 47357 | "# define INT_FAST8_MAX __INT_LEAST8_MAX\n" |
| 47358 | "# define UINT_FAST8_MAX __UINT_LEAST8_MAX\n" |
| 47359 | "#endif /* __INT_LEAST8_MIN */\n" |
| 47360 | "\n" |
| 47361 | "/* Some utility macros */\n" |
| 47362 | "#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN)\n" |
| 47363 | "#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX)\n" |
| 47364 | "#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX)\n" |
| 47365 | "#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v))\n" |
| 47366 | "#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v))\n" |
| 47367 | "\n" |
| 47368 | "/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */\n" |
| 47369 | "/* C99 7.18.3 Limits of other integer types. */\n" |
| 47370 | "\n" |
| 47371 | "#define INTPTR_MIN (-__INTPTR_MAX__-1)\n" |
| 47372 | "#define INTPTR_MAX __INTPTR_MAX__\n" |
| 47373 | "#define UINTPTR_MAX __UINTPTR_MAX__\n" |
| 47374 | "#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1)\n" |
| 47375 | "#define PTRDIFF_MAX __PTRDIFF_MAX__\n" |
| 47376 | "#define SIZE_MAX __SIZE_MAX__\n" |
| 47377 | "\n" |
| 47378 | "/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__\n" |
| 47379 | " * is enabled. */\n" |
| 47380 | "#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1\n" |
| 47381 | "#define RSIZE_MAX (SIZE_MAX >> 1)\n" |
| 47382 | "#endif\n" |
| 47383 | "\n" |
| 47384 | "/* C99 7.18.2.5 Limits of greatest-width integer types. */\n" |
| 47385 | "#define INTMAX_MIN (-__INTMAX_MAX__-1)\n" |
| 47386 | "#define INTMAX_MAX __INTMAX_MAX__\n" |
| 47387 | "#define UINTMAX_MAX __UINTMAX_MAX__\n" |
| 47388 | "\n" |
| 47389 | "/* C99 7.18.3 Limits of other integer types. */\n" |
| 47390 | "#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__)\n" |
| 47391 | "#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__)\n" |
| 47392 | "#ifdef __WINT_UNSIGNED__\n" |
| 47393 | "# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0)\n" |
| 47394 | "# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__)\n" |
| 47395 | "#else\n" |
| 47396 | "# define WINT_MIN __INTN_MIN(__WINT_WIDTH__)\n" |
| 47397 | "# define WINT_MAX __INTN_MAX(__WINT_WIDTH__)\n" |
| 47398 | "#endif\n" |
| 47399 | "\n" |
| 47400 | "#ifndef WCHAR_MAX\n" |
| 47401 | "# define WCHAR_MAX __WCHAR_MAX__\n" |
| 47402 | "#endif\n" |
| 47403 | "#ifndef WCHAR_MIN\n" |
| 47404 | "# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__)\n" |
| 47405 | "# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__)\n" |
| 47406 | "# else\n" |
| 47407 | "# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0)\n" |
| 47408 | "# endif\n" |
| 47409 | "#endif\n" |
| 47410 | "\n" |
| 47411 | "/* 7.18.4.2 Macros for greatest-width integer constants. */\n" |
| 47412 | "#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__)\n" |
| 47413 | "#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__)\n" |
| 47414 | "\n" |
| 47415 | "#endif /* __CLANG_STDINT_H2 */\n" |
| 47416 | "#endif /* __STDC_HOSTED__ */\n" |
| 47417 | "" } , |
| 47418 | { "/builtins/stdnoreturn.h" , "/*===---- stdnoreturn.h - Standard header for noreturn macro ---------------===\n" |
| 47419 | " *\n" |
| 47420 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 47421 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 47422 | " * in the Software without restriction, including without limitation the rights\n" |
| 47423 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 47424 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 47425 | " * furnished to do so, subject to the following conditions:\n" |
| 47426 | " *\n" |
| 47427 | " * The above copyright notice and this permission notice shall be included in\n" |
| 47428 | " * all copies or substantial portions of the Software.\n" |
| 47429 | " *\n" |
| 47430 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 47431 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 47432 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 47433 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 47434 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 47435 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 47436 | " * THE SOFTWARE.\n" |
| 47437 | " *\n" |
| 47438 | " *===-----------------------------------------------------------------------===\n" |
| 47439 | " */\n" |
| 47440 | "\n" |
| 47441 | "#ifndef __STDNORETURN_H\n" |
| 47442 | "#define __STDNORETURN_H\n" |
| 47443 | "\n" |
| 47444 | "#define noreturn _Noreturn\n" |
| 47445 | "#define __noreturn_is_defined 1\n" |
| 47446 | "\n" |
| 47447 | "#endif /* __STDNORETURN_H */\n" |
| 47448 | "" } , |
| 47449 | { "/builtins/tbmintrin.h" , "/*===---- tbmintrin.h - TBM intrinsics -------------------------------------===\n" |
| 47450 | " *\n" |
| 47451 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 47452 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 47453 | " * in the Software without restriction, including without limitation the rights\n" |
| 47454 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 47455 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 47456 | " * furnished to do so, subject to the following conditions:\n" |
| 47457 | " *\n" |
| 47458 | " * The above copyright notice and this permission notice shall be included in\n" |
| 47459 | " * all copies or substantial portions of the Software.\n" |
| 47460 | " *\n" |
| 47461 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 47462 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 47463 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 47464 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 47465 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 47466 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 47467 | " * THE SOFTWARE.\n" |
| 47468 | " *\n" |
| 47469 | " *===-----------------------------------------------------------------------===\n" |
| 47470 | " */\n" |
| 47471 | "\n" |
| 47472 | "#ifndef __X86INTRIN_H\n" |
| 47473 | "#error \"Never use <tbmintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 47474 | "#endif\n" |
| 47475 | "\n" |
| 47476 | "#ifndef __TBMINTRIN_H\n" |
| 47477 | "#define __TBMINTRIN_H\n" |
| 47478 | "\n" |
| 47479 | "/* Define the default attributes for the functions in this file. */\n" |
| 47480 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"tbm\")))\n" |
| 47481 | "\n" |
| 47482 | "#define __bextri_u32(a, b) \\\n" |
| 47483 | " ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \\\n" |
| 47484 | " (unsigned int)(b)))\n" |
| 47485 | "\n" |
| 47486 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47487 | "__blcfill_u32(unsigned int __a)\n" |
| 47488 | "{\n" |
| 47489 | " return __a & (__a + 1);\n" |
| 47490 | "}\n" |
| 47491 | "\n" |
| 47492 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47493 | "__blci_u32(unsigned int __a)\n" |
| 47494 | "{\n" |
| 47495 | " return __a | ~(__a + 1);\n" |
| 47496 | "}\n" |
| 47497 | "\n" |
| 47498 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47499 | "__blcic_u32(unsigned int __a)\n" |
| 47500 | "{\n" |
| 47501 | " return ~__a & (__a + 1);\n" |
| 47502 | "}\n" |
| 47503 | "\n" |
| 47504 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47505 | "__blcmsk_u32(unsigned int __a)\n" |
| 47506 | "{\n" |
| 47507 | " return __a ^ (__a + 1);\n" |
| 47508 | "}\n" |
| 47509 | "\n" |
| 47510 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47511 | "__blcs_u32(unsigned int __a)\n" |
| 47512 | "{\n" |
| 47513 | " return __a | (__a + 1);\n" |
| 47514 | "}\n" |
| 47515 | "\n" |
| 47516 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47517 | "__blsfill_u32(unsigned int __a)\n" |
| 47518 | "{\n" |
| 47519 | " return __a | (__a - 1);\n" |
| 47520 | "}\n" |
| 47521 | "\n" |
| 47522 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47523 | "__blsic_u32(unsigned int __a)\n" |
| 47524 | "{\n" |
| 47525 | " return ~__a | (__a - 1);\n" |
| 47526 | "}\n" |
| 47527 | "\n" |
| 47528 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47529 | "__t1mskc_u32(unsigned int __a)\n" |
| 47530 | "{\n" |
| 47531 | " return ~__a | (__a + 1);\n" |
| 47532 | "}\n" |
| 47533 | "\n" |
| 47534 | "static __inline__ unsigned int __DEFAULT_FN_ATTRS\n" |
| 47535 | "__tzmsk_u32(unsigned int __a)\n" |
| 47536 | "{\n" |
| 47537 | " return ~__a & (__a - 1);\n" |
| 47538 | "}\n" |
| 47539 | "\n" |
| 47540 | "#ifdef __x86_64__\n" |
| 47541 | "#define __bextri_u64(a, b) \\\n" |
| 47542 | " ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \\\n" |
| 47543 | " (unsigned long long)(b)))\n" |
| 47544 | "\n" |
| 47545 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47546 | "__blcfill_u64(unsigned long long __a)\n" |
| 47547 | "{\n" |
| 47548 | " return __a & (__a + 1);\n" |
| 47549 | "}\n" |
| 47550 | "\n" |
| 47551 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47552 | "__blci_u64(unsigned long long __a)\n" |
| 47553 | "{\n" |
| 47554 | " return __a | ~(__a + 1);\n" |
| 47555 | "}\n" |
| 47556 | "\n" |
| 47557 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47558 | "__blcic_u64(unsigned long long __a)\n" |
| 47559 | "{\n" |
| 47560 | " return ~__a & (__a + 1);\n" |
| 47561 | "}\n" |
| 47562 | "\n" |
| 47563 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47564 | "__blcmsk_u64(unsigned long long __a)\n" |
| 47565 | "{\n" |
| 47566 | " return __a ^ (__a + 1);\n" |
| 47567 | "}\n" |
| 47568 | "\n" |
| 47569 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47570 | "__blcs_u64(unsigned long long __a)\n" |
| 47571 | "{\n" |
| 47572 | " return __a | (__a + 1);\n" |
| 47573 | "}\n" |
| 47574 | "\n" |
| 47575 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47576 | "__blsfill_u64(unsigned long long __a)\n" |
| 47577 | "{\n" |
| 47578 | " return __a | (__a - 1);\n" |
| 47579 | "}\n" |
| 47580 | "\n" |
| 47581 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47582 | "__blsic_u64(unsigned long long __a)\n" |
| 47583 | "{\n" |
| 47584 | " return ~__a | (__a - 1);\n" |
| 47585 | "}\n" |
| 47586 | "\n" |
| 47587 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47588 | "__t1mskc_u64(unsigned long long __a)\n" |
| 47589 | "{\n" |
| 47590 | " return ~__a | (__a + 1);\n" |
| 47591 | "}\n" |
| 47592 | "\n" |
| 47593 | "static __inline__ unsigned long long __DEFAULT_FN_ATTRS\n" |
| 47594 | "__tzmsk_u64(unsigned long long __a)\n" |
| 47595 | "{\n" |
| 47596 | " return ~__a & (__a - 1);\n" |
| 47597 | "}\n" |
| 47598 | "#endif\n" |
| 47599 | "\n" |
| 47600 | "#undef __DEFAULT_FN_ATTRS\n" |
| 47601 | "\n" |
| 47602 | "#endif /* __TBMINTRIN_H */\n" |
| 47603 | "" } , |
| 47604 | { "/builtins/tgmath.h" , "/*===---- tgmath.h - Standard header for type generic math ----------------===*\\\n" |
| 47605 | " *\n" |
| 47606 | " * Copyright (c) 2009 Howard Hinnant\n" |
| 47607 | " *\n" |
| 47608 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 47609 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 47610 | " * in the Software without restriction, including without limitation the rights\n" |
| 47611 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 47612 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 47613 | " * furnished to do so, subject to the following conditions:\n" |
| 47614 | " *\n" |
| 47615 | " * The above copyright notice and this permission notice shall be included in\n" |
| 47616 | " * all copies or substantial portions of the Software.\n" |
| 47617 | " *\n" |
| 47618 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 47619 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 47620 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 47621 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 47622 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 47623 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 47624 | " * THE SOFTWARE.\n" |
| 47625 | " *\n" |
| 47626 | "\\*===----------------------------------------------------------------------===*/\n" |
| 47627 | "\n" |
| 47628 | "#ifndef __CLANG_TGMATH_H\n" |
| 47629 | "#define __CLANG_TGMATH_H\n" |
| 47630 | "\n" |
| 47631 | "/* C99 7.22 Type-generic math <tgmath.h>. */\n" |
| 47632 | "#include <math.h>\n" |
| 47633 | "\n" |
| 47634 | "/*\n" |
| 47635 | " * Allow additional definitions and implementation-defined values on Apple\n" |
| 47636 | " * platforms. This is done after #include <math.h> to avoid depcycle conflicts\n" |
| 47637 | " * between libcxx and darwin in C++ modules builds.\n" |
| 47638 | " */\n" |
| 47639 | "#if defined(__APPLE__) && __STDC_HOSTED__ && __has_include_next(<tgmath.h>)\n" |
| 47640 | "# include_next <tgmath.h>\n" |
| 47641 | "#else\n" |
| 47642 | "\n" |
| 47643 | "/* C++ handles type genericity with overloading in math.h. */\n" |
| 47644 | "#ifndef __cplusplus\n" |
| 47645 | "#include <complex.h>\n" |
| 47646 | "\n" |
| 47647 | "#define _TG_ATTRSp __attribute__((__overloadable__))\n" |
| 47648 | "#define _TG_ATTRS __attribute__((__overloadable__, __always_inline__))\n" |
| 47649 | "\n" |
| 47650 | "// promotion\n" |
| 47651 | "\n" |
| 47652 | "typedef void _Argument_type_is_not_arithmetic;\n" |
| 47653 | "static _Argument_type_is_not_arithmetic __tg_promote(...)\n" |
| 47654 | " __attribute__((__unavailable__,__overloadable__));\n" |
| 47655 | "static double _TG_ATTRSp __tg_promote(int);\n" |
| 47656 | "static double _TG_ATTRSp __tg_promote(unsigned int);\n" |
| 47657 | "static double _TG_ATTRSp __tg_promote(long);\n" |
| 47658 | "static double _TG_ATTRSp __tg_promote(unsigned long);\n" |
| 47659 | "static double _TG_ATTRSp __tg_promote(long long);\n" |
| 47660 | "static double _TG_ATTRSp __tg_promote(unsigned long long);\n" |
| 47661 | "static float _TG_ATTRSp __tg_promote(float);\n" |
| 47662 | "static double _TG_ATTRSp __tg_promote(double);\n" |
| 47663 | "static long double _TG_ATTRSp __tg_promote(long double);\n" |
| 47664 | "static float _Complex _TG_ATTRSp __tg_promote(float _Complex);\n" |
| 47665 | "static double _Complex _TG_ATTRSp __tg_promote(double _Complex);\n" |
| 47666 | "static long double _Complex _TG_ATTRSp __tg_promote(long double _Complex);\n" |
| 47667 | "\n" |
| 47668 | "#define __tg_promote1(__x) (__typeof__(__tg_promote(__x)))\n" |
| 47669 | "#define __tg_promote2(__x, __y) (__typeof__(__tg_promote(__x) + \\\n" |
| 47670 | " __tg_promote(__y)))\n" |
| 47671 | "#define __tg_promote3(__x, __y, __z) (__typeof__(__tg_promote(__x) + \\\n" |
| 47672 | " __tg_promote(__y) + \\\n" |
| 47673 | " __tg_promote(__z)))\n" |
| 47674 | "\n" |
| 47675 | "// acos\n" |
| 47676 | "\n" |
| 47677 | "static float\n" |
| 47678 | " _TG_ATTRS\n" |
| 47679 | " __tg_acos(float __x) {return acosf(__x);}\n" |
| 47680 | "\n" |
| 47681 | "static double\n" |
| 47682 | " _TG_ATTRS\n" |
| 47683 | " __tg_acos(double __x) {return acos(__x);}\n" |
| 47684 | "\n" |
| 47685 | "static long double\n" |
| 47686 | " _TG_ATTRS\n" |
| 47687 | " __tg_acos(long double __x) {return acosl(__x);}\n" |
| 47688 | "\n" |
| 47689 | "static float _Complex\n" |
| 47690 | " _TG_ATTRS\n" |
| 47691 | " __tg_acos(float _Complex __x) {return cacosf(__x);}\n" |
| 47692 | "\n" |
| 47693 | "static double _Complex\n" |
| 47694 | " _TG_ATTRS\n" |
| 47695 | " __tg_acos(double _Complex __x) {return cacos(__x);}\n" |
| 47696 | "\n" |
| 47697 | "static long double _Complex\n" |
| 47698 | " _TG_ATTRS\n" |
| 47699 | " __tg_acos(long double _Complex __x) {return cacosl(__x);}\n" |
| 47700 | "\n" |
| 47701 | "#undef acos\n" |
| 47702 | "#define acos(__x) __tg_acos(__tg_promote1((__x))(__x))\n" |
| 47703 | "\n" |
| 47704 | "// asin\n" |
| 47705 | "\n" |
| 47706 | "static float\n" |
| 47707 | " _TG_ATTRS\n" |
| 47708 | " __tg_asin(float __x) {return asinf(__x);}\n" |
| 47709 | "\n" |
| 47710 | "static double\n" |
| 47711 | " _TG_ATTRS\n" |
| 47712 | " __tg_asin(double __x) {return asin(__x);}\n" |
| 47713 | "\n" |
| 47714 | "static long double\n" |
| 47715 | " _TG_ATTRS\n" |
| 47716 | " __tg_asin(long double __x) {return asinl(__x);}\n" |
| 47717 | "\n" |
| 47718 | "static float _Complex\n" |
| 47719 | " _TG_ATTRS\n" |
| 47720 | " __tg_asin(float _Complex __x) {return casinf(__x);}\n" |
| 47721 | "\n" |
| 47722 | "static double _Complex\n" |
| 47723 | " _TG_ATTRS\n" |
| 47724 | " __tg_asin(double _Complex __x) {return casin(__x);}\n" |
| 47725 | "\n" |
| 47726 | "static long double _Complex\n" |
| 47727 | " _TG_ATTRS\n" |
| 47728 | " __tg_asin(long double _Complex __x) {return casinl(__x);}\n" |
| 47729 | "\n" |
| 47730 | "#undef asin\n" |
| 47731 | "#define asin(__x) __tg_asin(__tg_promote1((__x))(__x))\n" |
| 47732 | "\n" |
| 47733 | "// atan\n" |
| 47734 | "\n" |
| 47735 | "static float\n" |
| 47736 | " _TG_ATTRS\n" |
| 47737 | " __tg_atan(float __x) {return atanf(__x);}\n" |
| 47738 | "\n" |
| 47739 | "static double\n" |
| 47740 | " _TG_ATTRS\n" |
| 47741 | " __tg_atan(double __x) {return atan(__x);}\n" |
| 47742 | "\n" |
| 47743 | "static long double\n" |
| 47744 | " _TG_ATTRS\n" |
| 47745 | " __tg_atan(long double __x) {return atanl(__x);}\n" |
| 47746 | "\n" |
| 47747 | "static float _Complex\n" |
| 47748 | " _TG_ATTRS\n" |
| 47749 | " __tg_atan(float _Complex __x) {return catanf(__x);}\n" |
| 47750 | "\n" |
| 47751 | "static double _Complex\n" |
| 47752 | " _TG_ATTRS\n" |
| 47753 | " __tg_atan(double _Complex __x) {return catan(__x);}\n" |
| 47754 | "\n" |
| 47755 | "static long double _Complex\n" |
| 47756 | " _TG_ATTRS\n" |
| 47757 | " __tg_atan(long double _Complex __x) {return catanl(__x);}\n" |
| 47758 | "\n" |
| 47759 | "#undef atan\n" |
| 47760 | "#define atan(__x) __tg_atan(__tg_promote1((__x))(__x))\n" |
| 47761 | "\n" |
| 47762 | "// acosh\n" |
| 47763 | "\n" |
| 47764 | "static float\n" |
| 47765 | " _TG_ATTRS\n" |
| 47766 | " __tg_acosh(float __x) {return acoshf(__x);}\n" |
| 47767 | "\n" |
| 47768 | "static double\n" |
| 47769 | " _TG_ATTRS\n" |
| 47770 | " __tg_acosh(double __x) {return acosh(__x);}\n" |
| 47771 | "\n" |
| 47772 | "static long double\n" |
| 47773 | " _TG_ATTRS\n" |
| 47774 | " __tg_acosh(long double __x) {return acoshl(__x);}\n" |
| 47775 | "\n" |
| 47776 | "static float _Complex\n" |
| 47777 | " _TG_ATTRS\n" |
| 47778 | " __tg_acosh(float _Complex __x) {return cacoshf(__x);}\n" |
| 47779 | "\n" |
| 47780 | "static double _Complex\n" |
| 47781 | " _TG_ATTRS\n" |
| 47782 | " __tg_acosh(double _Complex __x) {return cacosh(__x);}\n" |
| 47783 | "\n" |
| 47784 | "static long double _Complex\n" |
| 47785 | " _TG_ATTRS\n" |
| 47786 | " __tg_acosh(long double _Complex __x) {return cacoshl(__x);}\n" |
| 47787 | "\n" |
| 47788 | "#undef acosh\n" |
| 47789 | "#define acosh(__x) __tg_acosh(__tg_promote1((__x))(__x))\n" |
| 47790 | "\n" |
| 47791 | "// asinh\n" |
| 47792 | "\n" |
| 47793 | "static float\n" |
| 47794 | " _TG_ATTRS\n" |
| 47795 | " __tg_asinh(float __x) {return asinhf(__x);}\n" |
| 47796 | "\n" |
| 47797 | "static double\n" |
| 47798 | " _TG_ATTRS\n" |
| 47799 | " __tg_asinh(double __x) {return asinh(__x);}\n" |
| 47800 | "\n" |
| 47801 | "static long double\n" |
| 47802 | " _TG_ATTRS\n" |
| 47803 | " __tg_asinh(long double __x) {return asinhl(__x);}\n" |
| 47804 | "\n" |
| 47805 | "static float _Complex\n" |
| 47806 | " _TG_ATTRS\n" |
| 47807 | " __tg_asinh(float _Complex __x) {return casinhf(__x);}\n" |
| 47808 | "\n" |
| 47809 | "static double _Complex\n" |
| 47810 | " _TG_ATTRS\n" |
| 47811 | " __tg_asinh(double _Complex __x) {return casinh(__x);}\n" |
| 47812 | "\n" |
| 47813 | "static long double _Complex\n" |
| 47814 | " _TG_ATTRS\n" |
| 47815 | " __tg_asinh(long double _Complex __x) {return casinhl(__x);}\n" |
| 47816 | "\n" |
| 47817 | "#undef asinh\n" |
| 47818 | "#define asinh(__x) __tg_asinh(__tg_promote1((__x))(__x))\n" |
| 47819 | "\n" |
| 47820 | "// atanh\n" |
| 47821 | "\n" |
| 47822 | "static float\n" |
| 47823 | " _TG_ATTRS\n" |
| 47824 | " __tg_atanh(float __x) {return atanhf(__x);}\n" |
| 47825 | "\n" |
| 47826 | "static double\n" |
| 47827 | " _TG_ATTRS\n" |
| 47828 | " __tg_atanh(double __x) {return atanh(__x);}\n" |
| 47829 | "\n" |
| 47830 | "static long double\n" |
| 47831 | " _TG_ATTRS\n" |
| 47832 | " __tg_atanh(long double __x) {return atanhl(__x);}\n" |
| 47833 | "\n" |
| 47834 | "static float _Complex\n" |
| 47835 | " _TG_ATTRS\n" |
| 47836 | " __tg_atanh(float _Complex __x) {return catanhf(__x);}\n" |
| 47837 | "\n" |
| 47838 | "static double _Complex\n" |
| 47839 | " _TG_ATTRS\n" |
| 47840 | " __tg_atanh(double _Complex __x) {return catanh(__x);}\n" |
| 47841 | "\n" |
| 47842 | "static long double _Complex\n" |
| 47843 | " _TG_ATTRS\n" |
| 47844 | " __tg_atanh(long double _Complex __x) {return catanhl(__x);}\n" |
| 47845 | "\n" |
| 47846 | "#undef atanh\n" |
| 47847 | "#define atanh(__x) __tg_atanh(__tg_promote1((__x))(__x))\n" |
| 47848 | "\n" |
| 47849 | "// cos\n" |
| 47850 | "\n" |
| 47851 | "static float\n" |
| 47852 | " _TG_ATTRS\n" |
| 47853 | " __tg_cos(float __x) {return cosf(__x);}\n" |
| 47854 | "\n" |
| 47855 | "static double\n" |
| 47856 | " _TG_ATTRS\n" |
| 47857 | " __tg_cos(double __x) {return cos(__x);}\n" |
| 47858 | "\n" |
| 47859 | "static long double\n" |
| 47860 | " _TG_ATTRS\n" |
| 47861 | " __tg_cos(long double __x) {return cosl(__x);}\n" |
| 47862 | "\n" |
| 47863 | "static float _Complex\n" |
| 47864 | " _TG_ATTRS\n" |
| 47865 | " __tg_cos(float _Complex __x) {return ccosf(__x);}\n" |
| 47866 | "\n" |
| 47867 | "static double _Complex\n" |
| 47868 | " _TG_ATTRS\n" |
| 47869 | " __tg_cos(double _Complex __x) {return ccos(__x);}\n" |
| 47870 | "\n" |
| 47871 | "static long double _Complex\n" |
| 47872 | " _TG_ATTRS\n" |
| 47873 | " __tg_cos(long double _Complex __x) {return ccosl(__x);}\n" |
| 47874 | "\n" |
| 47875 | "#undef cos\n" |
| 47876 | "#define cos(__x) __tg_cos(__tg_promote1((__x))(__x))\n" |
| 47877 | "\n" |
| 47878 | "// sin\n" |
| 47879 | "\n" |
| 47880 | "static float\n" |
| 47881 | " _TG_ATTRS\n" |
| 47882 | " __tg_sin(float __x) {return sinf(__x);}\n" |
| 47883 | "\n" |
| 47884 | "static double\n" |
| 47885 | " _TG_ATTRS\n" |
| 47886 | " __tg_sin(double __x) {return sin(__x);}\n" |
| 47887 | "\n" |
| 47888 | "static long double\n" |
| 47889 | " _TG_ATTRS\n" |
| 47890 | " __tg_sin(long double __x) {return sinl(__x);}\n" |
| 47891 | "\n" |
| 47892 | "static float _Complex\n" |
| 47893 | " _TG_ATTRS\n" |
| 47894 | " __tg_sin(float _Complex __x) {return csinf(__x);}\n" |
| 47895 | "\n" |
| 47896 | "static double _Complex\n" |
| 47897 | " _TG_ATTRS\n" |
| 47898 | " __tg_sin(double _Complex __x) {return csin(__x);}\n" |
| 47899 | "\n" |
| 47900 | "static long double _Complex\n" |
| 47901 | " _TG_ATTRS\n" |
| 47902 | " __tg_sin(long double _Complex __x) {return csinl(__x);}\n" |
| 47903 | "\n" |
| 47904 | "#undef sin\n" |
| 47905 | "#define sin(__x) __tg_sin(__tg_promote1((__x))(__x))\n" |
| 47906 | "\n" |
| 47907 | "// tan\n" |
| 47908 | "\n" |
| 47909 | "static float\n" |
| 47910 | " _TG_ATTRS\n" |
| 47911 | " __tg_tan(float __x) {return tanf(__x);}\n" |
| 47912 | "\n" |
| 47913 | "static double\n" |
| 47914 | " _TG_ATTRS\n" |
| 47915 | " __tg_tan(double __x) {return tan(__x);}\n" |
| 47916 | "\n" |
| 47917 | "static long double\n" |
| 47918 | " _TG_ATTRS\n" |
| 47919 | " __tg_tan(long double __x) {return tanl(__x);}\n" |
| 47920 | "\n" |
| 47921 | "static float _Complex\n" |
| 47922 | " _TG_ATTRS\n" |
| 47923 | " __tg_tan(float _Complex __x) {return ctanf(__x);}\n" |
| 47924 | "\n" |
| 47925 | "static double _Complex\n" |
| 47926 | " _TG_ATTRS\n" |
| 47927 | " __tg_tan(double _Complex __x) {return ctan(__x);}\n" |
| 47928 | "\n" |
| 47929 | "static long double _Complex\n" |
| 47930 | " _TG_ATTRS\n" |
| 47931 | " __tg_tan(long double _Complex __x) {return ctanl(__x);}\n" |
| 47932 | "\n" |
| 47933 | "#undef tan\n" |
| 47934 | "#define tan(__x) __tg_tan(__tg_promote1((__x))(__x))\n" |
| 47935 | "\n" |
| 47936 | "// cosh\n" |
| 47937 | "\n" |
| 47938 | "static float\n" |
| 47939 | " _TG_ATTRS\n" |
| 47940 | " __tg_cosh(float __x) {return coshf(__x);}\n" |
| 47941 | "\n" |
| 47942 | "static double\n" |
| 47943 | " _TG_ATTRS\n" |
| 47944 | " __tg_cosh(double __x) {return cosh(__x);}\n" |
| 47945 | "\n" |
| 47946 | "static long double\n" |
| 47947 | " _TG_ATTRS\n" |
| 47948 | " __tg_cosh(long double __x) {return coshl(__x);}\n" |
| 47949 | "\n" |
| 47950 | "static float _Complex\n" |
| 47951 | " _TG_ATTRS\n" |
| 47952 | " __tg_cosh(float _Complex __x) {return ccoshf(__x);}\n" |
| 47953 | "\n" |
| 47954 | "static double _Complex\n" |
| 47955 | " _TG_ATTRS\n" |
| 47956 | " __tg_cosh(double _Complex __x) {return ccosh(__x);}\n" |
| 47957 | "\n" |
| 47958 | "static long double _Complex\n" |
| 47959 | " _TG_ATTRS\n" |
| 47960 | " __tg_cosh(long double _Complex __x) {return ccoshl(__x);}\n" |
| 47961 | "\n" |
| 47962 | "#undef cosh\n" |
| 47963 | "#define cosh(__x) __tg_cosh(__tg_promote1((__x))(__x))\n" |
| 47964 | "\n" |
| 47965 | "// sinh\n" |
| 47966 | "\n" |
| 47967 | "static float\n" |
| 47968 | " _TG_ATTRS\n" |
| 47969 | " __tg_sinh(float __x) {return sinhf(__x);}\n" |
| 47970 | "\n" |
| 47971 | "static double\n" |
| 47972 | " _TG_ATTRS\n" |
| 47973 | " __tg_sinh(double __x) {return sinh(__x);}\n" |
| 47974 | "\n" |
| 47975 | "static long double\n" |
| 47976 | " _TG_ATTRS\n" |
| 47977 | " __tg_sinh(long double __x) {return sinhl(__x);}\n" |
| 47978 | "\n" |
| 47979 | "static float _Complex\n" |
| 47980 | " _TG_ATTRS\n" |
| 47981 | " __tg_sinh(float _Complex __x) {return csinhf(__x);}\n" |
| 47982 | "\n" |
| 47983 | "static double _Complex\n" |
| 47984 | " _TG_ATTRS\n" |
| 47985 | " __tg_sinh(double _Complex __x) {return csinh(__x);}\n" |
| 47986 | "\n" |
| 47987 | "static long double _Complex\n" |
| 47988 | " _TG_ATTRS\n" |
| 47989 | " __tg_sinh(long double _Complex __x) {return csinhl(__x);}\n" |
| 47990 | "\n" |
| 47991 | "#undef sinh\n" |
| 47992 | "#define sinh(__x) __tg_sinh(__tg_promote1((__x))(__x))\n" |
| 47993 | "\n" |
| 47994 | "// tanh\n" |
| 47995 | "\n" |
| 47996 | "static float\n" |
| 47997 | " _TG_ATTRS\n" |
| 47998 | " __tg_tanh(float __x) {return tanhf(__x);}\n" |
| 47999 | "\n" |
| 48000 | "static double\n" |
| 48001 | " _TG_ATTRS\n" |
| 48002 | " __tg_tanh(double __x) {return tanh(__x);}\n" |
| 48003 | "\n" |
| 48004 | "static long double\n" |
| 48005 | " _TG_ATTRS\n" |
| 48006 | " __tg_tanh(long double __x) {return tanhl(__x);}\n" |
| 48007 | "\n" |
| 48008 | "static float _Complex\n" |
| 48009 | " _TG_ATTRS\n" |
| 48010 | " __tg_tanh(float _Complex __x) {return ctanhf(__x);}\n" |
| 48011 | "\n" |
| 48012 | "static double _Complex\n" |
| 48013 | " _TG_ATTRS\n" |
| 48014 | " __tg_tanh(double _Complex __x) {return ctanh(__x);}\n" |
| 48015 | "\n" |
| 48016 | "static long double _Complex\n" |
| 48017 | " _TG_ATTRS\n" |
| 48018 | " __tg_tanh(long double _Complex __x) {return ctanhl(__x);}\n" |
| 48019 | "\n" |
| 48020 | "#undef tanh\n" |
| 48021 | "#define tanh(__x) __tg_tanh(__tg_promote1((__x))(__x))\n" |
| 48022 | "\n" |
| 48023 | "// exp\n" |
| 48024 | "\n" |
| 48025 | "static float\n" |
| 48026 | " _TG_ATTRS\n" |
| 48027 | " __tg_exp(float __x) {return expf(__x);}\n" |
| 48028 | "\n" |
| 48029 | "static double\n" |
| 48030 | " _TG_ATTRS\n" |
| 48031 | " __tg_exp(double __x) {return exp(__x);}\n" |
| 48032 | "\n" |
| 48033 | "static long double\n" |
| 48034 | " _TG_ATTRS\n" |
| 48035 | " __tg_exp(long double __x) {return expl(__x);}\n" |
| 48036 | "\n" |
| 48037 | "static float _Complex\n" |
| 48038 | " _TG_ATTRS\n" |
| 48039 | " __tg_exp(float _Complex __x) {return cexpf(__x);}\n" |
| 48040 | "\n" |
| 48041 | "static double _Complex\n" |
| 48042 | " _TG_ATTRS\n" |
| 48043 | " __tg_exp(double _Complex __x) {return cexp(__x);}\n" |
| 48044 | "\n" |
| 48045 | "static long double _Complex\n" |
| 48046 | " _TG_ATTRS\n" |
| 48047 | " __tg_exp(long double _Complex __x) {return cexpl(__x);}\n" |
| 48048 | "\n" |
| 48049 | "#undef exp\n" |
| 48050 | "#define exp(__x) __tg_exp(__tg_promote1((__x))(__x))\n" |
| 48051 | "\n" |
| 48052 | "// log\n" |
| 48053 | "\n" |
| 48054 | "static float\n" |
| 48055 | " _TG_ATTRS\n" |
| 48056 | " __tg_log(float __x) {return logf(__x);}\n" |
| 48057 | "\n" |
| 48058 | "static double\n" |
| 48059 | " _TG_ATTRS\n" |
| 48060 | " __tg_log(double __x) {return log(__x);}\n" |
| 48061 | "\n" |
| 48062 | "static long double\n" |
| 48063 | " _TG_ATTRS\n" |
| 48064 | " __tg_log(long double __x) {return logl(__x);}\n" |
| 48065 | "\n" |
| 48066 | "static float _Complex\n" |
| 48067 | " _TG_ATTRS\n" |
| 48068 | " __tg_log(float _Complex __x) {return clogf(__x);}\n" |
| 48069 | "\n" |
| 48070 | "static double _Complex\n" |
| 48071 | " _TG_ATTRS\n" |
| 48072 | " __tg_log(double _Complex __x) {return clog(__x);}\n" |
| 48073 | "\n" |
| 48074 | "static long double _Complex\n" |
| 48075 | " _TG_ATTRS\n" |
| 48076 | " __tg_log(long double _Complex __x) {return clogl(__x);}\n" |
| 48077 | "\n" |
| 48078 | "#undef log\n" |
| 48079 | "#define log(__x) __tg_log(__tg_promote1((__x))(__x))\n" |
| 48080 | "\n" |
| 48081 | "// pow\n" |
| 48082 | "\n" |
| 48083 | "static float\n" |
| 48084 | " _TG_ATTRS\n" |
| 48085 | " __tg_pow(float __x, float __y) {return powf(__x, __y);}\n" |
| 48086 | "\n" |
| 48087 | "static double\n" |
| 48088 | " _TG_ATTRS\n" |
| 48089 | " __tg_pow(double __x, double __y) {return pow(__x, __y);}\n" |
| 48090 | "\n" |
| 48091 | "static long double\n" |
| 48092 | " _TG_ATTRS\n" |
| 48093 | " __tg_pow(long double __x, long double __y) {return powl(__x, __y);}\n" |
| 48094 | "\n" |
| 48095 | "static float _Complex\n" |
| 48096 | " _TG_ATTRS\n" |
| 48097 | " __tg_pow(float _Complex __x, float _Complex __y) {return cpowf(__x, __y);}\n" |
| 48098 | "\n" |
| 48099 | "static double _Complex\n" |
| 48100 | " _TG_ATTRS\n" |
| 48101 | " __tg_pow(double _Complex __x, double _Complex __y) {return cpow(__x, __y);}\n" |
| 48102 | "\n" |
| 48103 | "static long double _Complex\n" |
| 48104 | " _TG_ATTRS\n" |
| 48105 | " __tg_pow(long double _Complex __x, long double _Complex __y)\n" |
| 48106 | " {return cpowl(__x, __y);}\n" |
| 48107 | "\n" |
| 48108 | "#undef pow\n" |
| 48109 | "#define pow(__x, __y) __tg_pow(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48110 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48111 | "\n" |
| 48112 | "// sqrt\n" |
| 48113 | "\n" |
| 48114 | "static float\n" |
| 48115 | " _TG_ATTRS\n" |
| 48116 | " __tg_sqrt(float __x) {return sqrtf(__x);}\n" |
| 48117 | "\n" |
| 48118 | "static double\n" |
| 48119 | " _TG_ATTRS\n" |
| 48120 | " __tg_sqrt(double __x) {return sqrt(__x);}\n" |
| 48121 | "\n" |
| 48122 | "static long double\n" |
| 48123 | " _TG_ATTRS\n" |
| 48124 | " __tg_sqrt(long double __x) {return sqrtl(__x);}\n" |
| 48125 | "\n" |
| 48126 | "static float _Complex\n" |
| 48127 | " _TG_ATTRS\n" |
| 48128 | " __tg_sqrt(float _Complex __x) {return csqrtf(__x);}\n" |
| 48129 | "\n" |
| 48130 | "static double _Complex\n" |
| 48131 | " _TG_ATTRS\n" |
| 48132 | " __tg_sqrt(double _Complex __x) {return csqrt(__x);}\n" |
| 48133 | "\n" |
| 48134 | "static long double _Complex\n" |
| 48135 | " _TG_ATTRS\n" |
| 48136 | " __tg_sqrt(long double _Complex __x) {return csqrtl(__x);}\n" |
| 48137 | "\n" |
| 48138 | "#undef sqrt\n" |
| 48139 | "#define sqrt(__x) __tg_sqrt(__tg_promote1((__x))(__x))\n" |
| 48140 | "\n" |
| 48141 | "// fabs\n" |
| 48142 | "\n" |
| 48143 | "static float\n" |
| 48144 | " _TG_ATTRS\n" |
| 48145 | " __tg_fabs(float __x) {return fabsf(__x);}\n" |
| 48146 | "\n" |
| 48147 | "static double\n" |
| 48148 | " _TG_ATTRS\n" |
| 48149 | " __tg_fabs(double __x) {return fabs(__x);}\n" |
| 48150 | "\n" |
| 48151 | "static long double\n" |
| 48152 | " _TG_ATTRS\n" |
| 48153 | " __tg_fabs(long double __x) {return fabsl(__x);}\n" |
| 48154 | "\n" |
| 48155 | "static float\n" |
| 48156 | " _TG_ATTRS\n" |
| 48157 | " __tg_fabs(float _Complex __x) {return cabsf(__x);}\n" |
| 48158 | "\n" |
| 48159 | "static double\n" |
| 48160 | " _TG_ATTRS\n" |
| 48161 | " __tg_fabs(double _Complex __x) {return cabs(__x);}\n" |
| 48162 | "\n" |
| 48163 | "static long double\n" |
| 48164 | " _TG_ATTRS\n" |
| 48165 | " __tg_fabs(long double _Complex __x) {return cabsl(__x);}\n" |
| 48166 | "\n" |
| 48167 | "#undef fabs\n" |
| 48168 | "#define fabs(__x) __tg_fabs(__tg_promote1((__x))(__x))\n" |
| 48169 | "\n" |
| 48170 | "// atan2\n" |
| 48171 | "\n" |
| 48172 | "static float\n" |
| 48173 | " _TG_ATTRS\n" |
| 48174 | " __tg_atan2(float __x, float __y) {return atan2f(__x, __y);}\n" |
| 48175 | "\n" |
| 48176 | "static double\n" |
| 48177 | " _TG_ATTRS\n" |
| 48178 | " __tg_atan2(double __x, double __y) {return atan2(__x, __y);}\n" |
| 48179 | "\n" |
| 48180 | "static long double\n" |
| 48181 | " _TG_ATTRS\n" |
| 48182 | " __tg_atan2(long double __x, long double __y) {return atan2l(__x, __y);}\n" |
| 48183 | "\n" |
| 48184 | "#undef atan2\n" |
| 48185 | "#define atan2(__x, __y) __tg_atan2(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48186 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48187 | "\n" |
| 48188 | "// cbrt\n" |
| 48189 | "\n" |
| 48190 | "static float\n" |
| 48191 | " _TG_ATTRS\n" |
| 48192 | " __tg_cbrt(float __x) {return cbrtf(__x);}\n" |
| 48193 | "\n" |
| 48194 | "static double\n" |
| 48195 | " _TG_ATTRS\n" |
| 48196 | " __tg_cbrt(double __x) {return cbrt(__x);}\n" |
| 48197 | "\n" |
| 48198 | "static long double\n" |
| 48199 | " _TG_ATTRS\n" |
| 48200 | " __tg_cbrt(long double __x) {return cbrtl(__x);}\n" |
| 48201 | "\n" |
| 48202 | "#undef cbrt\n" |
| 48203 | "#define cbrt(__x) __tg_cbrt(__tg_promote1((__x))(__x))\n" |
| 48204 | "\n" |
| 48205 | "// ceil\n" |
| 48206 | "\n" |
| 48207 | "static float\n" |
| 48208 | " _TG_ATTRS\n" |
| 48209 | " __tg_ceil(float __x) {return ceilf(__x);}\n" |
| 48210 | "\n" |
| 48211 | "static double\n" |
| 48212 | " _TG_ATTRS\n" |
| 48213 | " __tg_ceil(double __x) {return ceil(__x);}\n" |
| 48214 | "\n" |
| 48215 | "static long double\n" |
| 48216 | " _TG_ATTRS\n" |
| 48217 | " __tg_ceil(long double __x) {return ceill(__x);}\n" |
| 48218 | "\n" |
| 48219 | "#undef ceil\n" |
| 48220 | "#define ceil(__x) __tg_ceil(__tg_promote1((__x))(__x))\n" |
| 48221 | "\n" |
| 48222 | "// copysign\n" |
| 48223 | "\n" |
| 48224 | "static float\n" |
| 48225 | " _TG_ATTRS\n" |
| 48226 | " __tg_copysign(float __x, float __y) {return copysignf(__x, __y);}\n" |
| 48227 | "\n" |
| 48228 | "static double\n" |
| 48229 | " _TG_ATTRS\n" |
| 48230 | " __tg_copysign(double __x, double __y) {return copysign(__x, __y);}\n" |
| 48231 | "\n" |
| 48232 | "static long double\n" |
| 48233 | " _TG_ATTRS\n" |
| 48234 | " __tg_copysign(long double __x, long double __y) {return copysignl(__x, __y);}\n" |
| 48235 | "\n" |
| 48236 | "#undef copysign\n" |
| 48237 | "#define copysign(__x, __y) __tg_copysign(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48238 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48239 | "\n" |
| 48240 | "// erf\n" |
| 48241 | "\n" |
| 48242 | "static float\n" |
| 48243 | " _TG_ATTRS\n" |
| 48244 | " __tg_erf(float __x) {return erff(__x);}\n" |
| 48245 | "\n" |
| 48246 | "static double\n" |
| 48247 | " _TG_ATTRS\n" |
| 48248 | " __tg_erf(double __x) {return erf(__x);}\n" |
| 48249 | "\n" |
| 48250 | "static long double\n" |
| 48251 | " _TG_ATTRS\n" |
| 48252 | " __tg_erf(long double __x) {return erfl(__x);}\n" |
| 48253 | "\n" |
| 48254 | "#undef erf\n" |
| 48255 | "#define erf(__x) __tg_erf(__tg_promote1((__x))(__x))\n" |
| 48256 | "\n" |
| 48257 | "// erfc\n" |
| 48258 | "\n" |
| 48259 | "static float\n" |
| 48260 | " _TG_ATTRS\n" |
| 48261 | " __tg_erfc(float __x) {return erfcf(__x);}\n" |
| 48262 | "\n" |
| 48263 | "static double\n" |
| 48264 | " _TG_ATTRS\n" |
| 48265 | " __tg_erfc(double __x) {return erfc(__x);}\n" |
| 48266 | "\n" |
| 48267 | "static long double\n" |
| 48268 | " _TG_ATTRS\n" |
| 48269 | " __tg_erfc(long double __x) {return erfcl(__x);}\n" |
| 48270 | "\n" |
| 48271 | "#undef erfc\n" |
| 48272 | "#define erfc(__x) __tg_erfc(__tg_promote1((__x))(__x))\n" |
| 48273 | "\n" |
| 48274 | "// exp2\n" |
| 48275 | "\n" |
| 48276 | "static float\n" |
| 48277 | " _TG_ATTRS\n" |
| 48278 | " __tg_exp2(float __x) {return exp2f(__x);}\n" |
| 48279 | "\n" |
| 48280 | "static double\n" |
| 48281 | " _TG_ATTRS\n" |
| 48282 | " __tg_exp2(double __x) {return exp2(__x);}\n" |
| 48283 | "\n" |
| 48284 | "static long double\n" |
| 48285 | " _TG_ATTRS\n" |
| 48286 | " __tg_exp2(long double __x) {return exp2l(__x);}\n" |
| 48287 | "\n" |
| 48288 | "#undef exp2\n" |
| 48289 | "#define exp2(__x) __tg_exp2(__tg_promote1((__x))(__x))\n" |
| 48290 | "\n" |
| 48291 | "// expm1\n" |
| 48292 | "\n" |
| 48293 | "static float\n" |
| 48294 | " _TG_ATTRS\n" |
| 48295 | " __tg_expm1(float __x) {return expm1f(__x);}\n" |
| 48296 | "\n" |
| 48297 | "static double\n" |
| 48298 | " _TG_ATTRS\n" |
| 48299 | " __tg_expm1(double __x) {return expm1(__x);}\n" |
| 48300 | "\n" |
| 48301 | "static long double\n" |
| 48302 | " _TG_ATTRS\n" |
| 48303 | " __tg_expm1(long double __x) {return expm1l(__x);}\n" |
| 48304 | "\n" |
| 48305 | "#undef expm1\n" |
| 48306 | "#define expm1(__x) __tg_expm1(__tg_promote1((__x))(__x))\n" |
| 48307 | "\n" |
| 48308 | "// fdim\n" |
| 48309 | "\n" |
| 48310 | "static float\n" |
| 48311 | " _TG_ATTRS\n" |
| 48312 | " __tg_fdim(float __x, float __y) {return fdimf(__x, __y);}\n" |
| 48313 | "\n" |
| 48314 | "static double\n" |
| 48315 | " _TG_ATTRS\n" |
| 48316 | " __tg_fdim(double __x, double __y) {return fdim(__x, __y);}\n" |
| 48317 | "\n" |
| 48318 | "static long double\n" |
| 48319 | " _TG_ATTRS\n" |
| 48320 | " __tg_fdim(long double __x, long double __y) {return fdiml(__x, __y);}\n" |
| 48321 | "\n" |
| 48322 | "#undef fdim\n" |
| 48323 | "#define fdim(__x, __y) __tg_fdim(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48324 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48325 | "\n" |
| 48326 | "// floor\n" |
| 48327 | "\n" |
| 48328 | "static float\n" |
| 48329 | " _TG_ATTRS\n" |
| 48330 | " __tg_floor(float __x) {return floorf(__x);}\n" |
| 48331 | "\n" |
| 48332 | "static double\n" |
| 48333 | " _TG_ATTRS\n" |
| 48334 | " __tg_floor(double __x) {return floor(__x);}\n" |
| 48335 | "\n" |
| 48336 | "static long double\n" |
| 48337 | " _TG_ATTRS\n" |
| 48338 | " __tg_floor(long double __x) {return floorl(__x);}\n" |
| 48339 | "\n" |
| 48340 | "#undef floor\n" |
| 48341 | "#define floor(__x) __tg_floor(__tg_promote1((__x))(__x))\n" |
| 48342 | "\n" |
| 48343 | "// fma\n" |
| 48344 | "\n" |
| 48345 | "static float\n" |
| 48346 | " _TG_ATTRS\n" |
| 48347 | " __tg_fma(float __x, float __y, float __z)\n" |
| 48348 | " {return fmaf(__x, __y, __z);}\n" |
| 48349 | "\n" |
| 48350 | "static double\n" |
| 48351 | " _TG_ATTRS\n" |
| 48352 | " __tg_fma(double __x, double __y, double __z)\n" |
| 48353 | " {return fma(__x, __y, __z);}\n" |
| 48354 | "\n" |
| 48355 | "static long double\n" |
| 48356 | " _TG_ATTRS\n" |
| 48357 | " __tg_fma(long double __x,long double __y, long double __z)\n" |
| 48358 | " {return fmal(__x, __y, __z);}\n" |
| 48359 | "\n" |
| 48360 | "#undef fma\n" |
| 48361 | "#define fma(__x, __y, __z) \\\n" |
| 48362 | " __tg_fma(__tg_promote3((__x), (__y), (__z))(__x), \\\n" |
| 48363 | " __tg_promote3((__x), (__y), (__z))(__y), \\\n" |
| 48364 | " __tg_promote3((__x), (__y), (__z))(__z))\n" |
| 48365 | "\n" |
| 48366 | "// fmax\n" |
| 48367 | "\n" |
| 48368 | "static float\n" |
| 48369 | " _TG_ATTRS\n" |
| 48370 | " __tg_fmax(float __x, float __y) {return fmaxf(__x, __y);}\n" |
| 48371 | "\n" |
| 48372 | "static double\n" |
| 48373 | " _TG_ATTRS\n" |
| 48374 | " __tg_fmax(double __x, double __y) {return fmax(__x, __y);}\n" |
| 48375 | "\n" |
| 48376 | "static long double\n" |
| 48377 | " _TG_ATTRS\n" |
| 48378 | " __tg_fmax(long double __x, long double __y) {return fmaxl(__x, __y);}\n" |
| 48379 | "\n" |
| 48380 | "#undef fmax\n" |
| 48381 | "#define fmax(__x, __y) __tg_fmax(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48382 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48383 | "\n" |
| 48384 | "// fmin\n" |
| 48385 | "\n" |
| 48386 | "static float\n" |
| 48387 | " _TG_ATTRS\n" |
| 48388 | " __tg_fmin(float __x, float __y) {return fminf(__x, __y);}\n" |
| 48389 | "\n" |
| 48390 | "static double\n" |
| 48391 | " _TG_ATTRS\n" |
| 48392 | " __tg_fmin(double __x, double __y) {return fmin(__x, __y);}\n" |
| 48393 | "\n" |
| 48394 | "static long double\n" |
| 48395 | " _TG_ATTRS\n" |
| 48396 | " __tg_fmin(long double __x, long double __y) {return fminl(__x, __y);}\n" |
| 48397 | "\n" |
| 48398 | "#undef fmin\n" |
| 48399 | "#define fmin(__x, __y) __tg_fmin(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48400 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48401 | "\n" |
| 48402 | "// fmod\n" |
| 48403 | "\n" |
| 48404 | "static float\n" |
| 48405 | " _TG_ATTRS\n" |
| 48406 | " __tg_fmod(float __x, float __y) {return fmodf(__x, __y);}\n" |
| 48407 | "\n" |
| 48408 | "static double\n" |
| 48409 | " _TG_ATTRS\n" |
| 48410 | " __tg_fmod(double __x, double __y) {return fmod(__x, __y);}\n" |
| 48411 | "\n" |
| 48412 | "static long double\n" |
| 48413 | " _TG_ATTRS\n" |
| 48414 | " __tg_fmod(long double __x, long double __y) {return fmodl(__x, __y);}\n" |
| 48415 | "\n" |
| 48416 | "#undef fmod\n" |
| 48417 | "#define fmod(__x, __y) __tg_fmod(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48418 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48419 | "\n" |
| 48420 | "// frexp\n" |
| 48421 | "\n" |
| 48422 | "static float\n" |
| 48423 | " _TG_ATTRS\n" |
| 48424 | " __tg_frexp(float __x, int* __y) {return frexpf(__x, __y);}\n" |
| 48425 | "\n" |
| 48426 | "static double\n" |
| 48427 | " _TG_ATTRS\n" |
| 48428 | " __tg_frexp(double __x, int* __y) {return frexp(__x, __y);}\n" |
| 48429 | "\n" |
| 48430 | "static long double\n" |
| 48431 | " _TG_ATTRS\n" |
| 48432 | " __tg_frexp(long double __x, int* __y) {return frexpl(__x, __y);}\n" |
| 48433 | "\n" |
| 48434 | "#undef frexp\n" |
| 48435 | "#define frexp(__x, __y) __tg_frexp(__tg_promote1((__x))(__x), __y)\n" |
| 48436 | "\n" |
| 48437 | "// hypot\n" |
| 48438 | "\n" |
| 48439 | "static float\n" |
| 48440 | " _TG_ATTRS\n" |
| 48441 | " __tg_hypot(float __x, float __y) {return hypotf(__x, __y);}\n" |
| 48442 | "\n" |
| 48443 | "static double\n" |
| 48444 | " _TG_ATTRS\n" |
| 48445 | " __tg_hypot(double __x, double __y) {return hypot(__x, __y);}\n" |
| 48446 | "\n" |
| 48447 | "static long double\n" |
| 48448 | " _TG_ATTRS\n" |
| 48449 | " __tg_hypot(long double __x, long double __y) {return hypotl(__x, __y);}\n" |
| 48450 | "\n" |
| 48451 | "#undef hypot\n" |
| 48452 | "#define hypot(__x, __y) __tg_hypot(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48453 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48454 | "\n" |
| 48455 | "// ilogb\n" |
| 48456 | "\n" |
| 48457 | "static int\n" |
| 48458 | " _TG_ATTRS\n" |
| 48459 | " __tg_ilogb(float __x) {return ilogbf(__x);}\n" |
| 48460 | "\n" |
| 48461 | "static int\n" |
| 48462 | " _TG_ATTRS\n" |
| 48463 | " __tg_ilogb(double __x) {return ilogb(__x);}\n" |
| 48464 | "\n" |
| 48465 | "static int\n" |
| 48466 | " _TG_ATTRS\n" |
| 48467 | " __tg_ilogb(long double __x) {return ilogbl(__x);}\n" |
| 48468 | "\n" |
| 48469 | "#undef ilogb\n" |
| 48470 | "#define ilogb(__x) __tg_ilogb(__tg_promote1((__x))(__x))\n" |
| 48471 | "\n" |
| 48472 | "// ldexp\n" |
| 48473 | "\n" |
| 48474 | "static float\n" |
| 48475 | " _TG_ATTRS\n" |
| 48476 | " __tg_ldexp(float __x, int __y) {return ldexpf(__x, __y);}\n" |
| 48477 | "\n" |
| 48478 | "static double\n" |
| 48479 | " _TG_ATTRS\n" |
| 48480 | " __tg_ldexp(double __x, int __y) {return ldexp(__x, __y);}\n" |
| 48481 | "\n" |
| 48482 | "static long double\n" |
| 48483 | " _TG_ATTRS\n" |
| 48484 | " __tg_ldexp(long double __x, int __y) {return ldexpl(__x, __y);}\n" |
| 48485 | "\n" |
| 48486 | "#undef ldexp\n" |
| 48487 | "#define ldexp(__x, __y) __tg_ldexp(__tg_promote1((__x))(__x), __y)\n" |
| 48488 | "\n" |
| 48489 | "// lgamma\n" |
| 48490 | "\n" |
| 48491 | "static float\n" |
| 48492 | " _TG_ATTRS\n" |
| 48493 | " __tg_lgamma(float __x) {return lgammaf(__x);}\n" |
| 48494 | "\n" |
| 48495 | "static double\n" |
| 48496 | " _TG_ATTRS\n" |
| 48497 | " __tg_lgamma(double __x) {return lgamma(__x);}\n" |
| 48498 | "\n" |
| 48499 | "static long double\n" |
| 48500 | " _TG_ATTRS\n" |
| 48501 | " __tg_lgamma(long double __x) {return lgammal(__x);}\n" |
| 48502 | "\n" |
| 48503 | "#undef lgamma\n" |
| 48504 | "#define lgamma(__x) __tg_lgamma(__tg_promote1((__x))(__x))\n" |
| 48505 | "\n" |
| 48506 | "// llrint\n" |
| 48507 | "\n" |
| 48508 | "static long long\n" |
| 48509 | " _TG_ATTRS\n" |
| 48510 | " __tg_llrint(float __x) {return llrintf(__x);}\n" |
| 48511 | "\n" |
| 48512 | "static long long\n" |
| 48513 | " _TG_ATTRS\n" |
| 48514 | " __tg_llrint(double __x) {return llrint(__x);}\n" |
| 48515 | "\n" |
| 48516 | "static long long\n" |
| 48517 | " _TG_ATTRS\n" |
| 48518 | " __tg_llrint(long double __x) {return llrintl(__x);}\n" |
| 48519 | "\n" |
| 48520 | "#undef llrint\n" |
| 48521 | "#define llrint(__x) __tg_llrint(__tg_promote1((__x))(__x))\n" |
| 48522 | "\n" |
| 48523 | "// llround\n" |
| 48524 | "\n" |
| 48525 | "static long long\n" |
| 48526 | " _TG_ATTRS\n" |
| 48527 | " __tg_llround(float __x) {return llroundf(__x);}\n" |
| 48528 | "\n" |
| 48529 | "static long long\n" |
| 48530 | " _TG_ATTRS\n" |
| 48531 | " __tg_llround(double __x) {return llround(__x);}\n" |
| 48532 | "\n" |
| 48533 | "static long long\n" |
| 48534 | " _TG_ATTRS\n" |
| 48535 | " __tg_llround(long double __x) {return llroundl(__x);}\n" |
| 48536 | "\n" |
| 48537 | "#undef llround\n" |
| 48538 | "#define llround(__x) __tg_llround(__tg_promote1((__x))(__x))\n" |
| 48539 | "\n" |
| 48540 | "// log10\n" |
| 48541 | "\n" |
| 48542 | "static float\n" |
| 48543 | " _TG_ATTRS\n" |
| 48544 | " __tg_log10(float __x) {return log10f(__x);}\n" |
| 48545 | "\n" |
| 48546 | "static double\n" |
| 48547 | " _TG_ATTRS\n" |
| 48548 | " __tg_log10(double __x) {return log10(__x);}\n" |
| 48549 | "\n" |
| 48550 | "static long double\n" |
| 48551 | " _TG_ATTRS\n" |
| 48552 | " __tg_log10(long double __x) {return log10l(__x);}\n" |
| 48553 | "\n" |
| 48554 | "#undef log10\n" |
| 48555 | "#define log10(__x) __tg_log10(__tg_promote1((__x))(__x))\n" |
| 48556 | "\n" |
| 48557 | "// log1p\n" |
| 48558 | "\n" |
| 48559 | "static float\n" |
| 48560 | " _TG_ATTRS\n" |
| 48561 | " __tg_log1p(float __x) {return log1pf(__x);}\n" |
| 48562 | "\n" |
| 48563 | "static double\n" |
| 48564 | " _TG_ATTRS\n" |
| 48565 | " __tg_log1p(double __x) {return log1p(__x);}\n" |
| 48566 | "\n" |
| 48567 | "static long double\n" |
| 48568 | " _TG_ATTRS\n" |
| 48569 | " __tg_log1p(long double __x) {return log1pl(__x);}\n" |
| 48570 | "\n" |
| 48571 | "#undef log1p\n" |
| 48572 | "#define log1p(__x) __tg_log1p(__tg_promote1((__x))(__x))\n" |
| 48573 | "\n" |
| 48574 | "// log2\n" |
| 48575 | "\n" |
| 48576 | "static float\n" |
| 48577 | " _TG_ATTRS\n" |
| 48578 | " __tg_log2(float __x) {return log2f(__x);}\n" |
| 48579 | "\n" |
| 48580 | "static double\n" |
| 48581 | " _TG_ATTRS\n" |
| 48582 | " __tg_log2(double __x) {return log2(__x);}\n" |
| 48583 | "\n" |
| 48584 | "static long double\n" |
| 48585 | " _TG_ATTRS\n" |
| 48586 | " __tg_log2(long double __x) {return log2l(__x);}\n" |
| 48587 | "\n" |
| 48588 | "#undef log2\n" |
| 48589 | "#define log2(__x) __tg_log2(__tg_promote1((__x))(__x))\n" |
| 48590 | "\n" |
| 48591 | "// logb\n" |
| 48592 | "\n" |
| 48593 | "static float\n" |
| 48594 | " _TG_ATTRS\n" |
| 48595 | " __tg_logb(float __x) {return logbf(__x);}\n" |
| 48596 | "\n" |
| 48597 | "static double\n" |
| 48598 | " _TG_ATTRS\n" |
| 48599 | " __tg_logb(double __x) {return logb(__x);}\n" |
| 48600 | "\n" |
| 48601 | "static long double\n" |
| 48602 | " _TG_ATTRS\n" |
| 48603 | " __tg_logb(long double __x) {return logbl(__x);}\n" |
| 48604 | "\n" |
| 48605 | "#undef logb\n" |
| 48606 | "#define logb(__x) __tg_logb(__tg_promote1((__x))(__x))\n" |
| 48607 | "\n" |
| 48608 | "// lrint\n" |
| 48609 | "\n" |
| 48610 | "static long\n" |
| 48611 | " _TG_ATTRS\n" |
| 48612 | " __tg_lrint(float __x) {return lrintf(__x);}\n" |
| 48613 | "\n" |
| 48614 | "static long\n" |
| 48615 | " _TG_ATTRS\n" |
| 48616 | " __tg_lrint(double __x) {return lrint(__x);}\n" |
| 48617 | "\n" |
| 48618 | "static long\n" |
| 48619 | " _TG_ATTRS\n" |
| 48620 | " __tg_lrint(long double __x) {return lrintl(__x);}\n" |
| 48621 | "\n" |
| 48622 | "#undef lrint\n" |
| 48623 | "#define lrint(__x) __tg_lrint(__tg_promote1((__x))(__x))\n" |
| 48624 | "\n" |
| 48625 | "// lround\n" |
| 48626 | "\n" |
| 48627 | "static long\n" |
| 48628 | " _TG_ATTRS\n" |
| 48629 | " __tg_lround(float __x) {return lroundf(__x);}\n" |
| 48630 | "\n" |
| 48631 | "static long\n" |
| 48632 | " _TG_ATTRS\n" |
| 48633 | " __tg_lround(double __x) {return lround(__x);}\n" |
| 48634 | "\n" |
| 48635 | "static long\n" |
| 48636 | " _TG_ATTRS\n" |
| 48637 | " __tg_lround(long double __x) {return lroundl(__x);}\n" |
| 48638 | "\n" |
| 48639 | "#undef lround\n" |
| 48640 | "#define lround(__x) __tg_lround(__tg_promote1((__x))(__x))\n" |
| 48641 | "\n" |
| 48642 | "// nearbyint\n" |
| 48643 | "\n" |
| 48644 | "static float\n" |
| 48645 | " _TG_ATTRS\n" |
| 48646 | " __tg_nearbyint(float __x) {return nearbyintf(__x);}\n" |
| 48647 | "\n" |
| 48648 | "static double\n" |
| 48649 | " _TG_ATTRS\n" |
| 48650 | " __tg_nearbyint(double __x) {return nearbyint(__x);}\n" |
| 48651 | "\n" |
| 48652 | "static long double\n" |
| 48653 | " _TG_ATTRS\n" |
| 48654 | " __tg_nearbyint(long double __x) {return nearbyintl(__x);}\n" |
| 48655 | "\n" |
| 48656 | "#undef nearbyint\n" |
| 48657 | "#define nearbyint(__x) __tg_nearbyint(__tg_promote1((__x))(__x))\n" |
| 48658 | "\n" |
| 48659 | "// nextafter\n" |
| 48660 | "\n" |
| 48661 | "static float\n" |
| 48662 | " _TG_ATTRS\n" |
| 48663 | " __tg_nextafter(float __x, float __y) {return nextafterf(__x, __y);}\n" |
| 48664 | "\n" |
| 48665 | "static double\n" |
| 48666 | " _TG_ATTRS\n" |
| 48667 | " __tg_nextafter(double __x, double __y) {return nextafter(__x, __y);}\n" |
| 48668 | "\n" |
| 48669 | "static long double\n" |
| 48670 | " _TG_ATTRS\n" |
| 48671 | " __tg_nextafter(long double __x, long double __y) {return nextafterl(__x, __y);}\n" |
| 48672 | "\n" |
| 48673 | "#undef nextafter\n" |
| 48674 | "#define nextafter(__x, __y) __tg_nextafter(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48675 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48676 | "\n" |
| 48677 | "// nexttoward\n" |
| 48678 | "\n" |
| 48679 | "static float\n" |
| 48680 | " _TG_ATTRS\n" |
| 48681 | " __tg_nexttoward(float __x, long double __y) {return nexttowardf(__x, __y);}\n" |
| 48682 | "\n" |
| 48683 | "static double\n" |
| 48684 | " _TG_ATTRS\n" |
| 48685 | " __tg_nexttoward(double __x, long double __y) {return nexttoward(__x, __y);}\n" |
| 48686 | "\n" |
| 48687 | "static long double\n" |
| 48688 | " _TG_ATTRS\n" |
| 48689 | " __tg_nexttoward(long double __x, long double __y) {return nexttowardl(__x, __y);}\n" |
| 48690 | "\n" |
| 48691 | "#undef nexttoward\n" |
| 48692 | "#define nexttoward(__x, __y) __tg_nexttoward(__tg_promote1((__x))(__x), (__y))\n" |
| 48693 | "\n" |
| 48694 | "// remainder\n" |
| 48695 | "\n" |
| 48696 | "static float\n" |
| 48697 | " _TG_ATTRS\n" |
| 48698 | " __tg_remainder(float __x, float __y) {return remainderf(__x, __y);}\n" |
| 48699 | "\n" |
| 48700 | "static double\n" |
| 48701 | " _TG_ATTRS\n" |
| 48702 | " __tg_remainder(double __x, double __y) {return remainder(__x, __y);}\n" |
| 48703 | "\n" |
| 48704 | "static long double\n" |
| 48705 | " _TG_ATTRS\n" |
| 48706 | " __tg_remainder(long double __x, long double __y) {return remainderl(__x, __y);}\n" |
| 48707 | "\n" |
| 48708 | "#undef remainder\n" |
| 48709 | "#define remainder(__x, __y) __tg_remainder(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48710 | " __tg_promote2((__x), (__y))(__y))\n" |
| 48711 | "\n" |
| 48712 | "// remquo\n" |
| 48713 | "\n" |
| 48714 | "static float\n" |
| 48715 | " _TG_ATTRS\n" |
| 48716 | " __tg_remquo(float __x, float __y, int* __z)\n" |
| 48717 | " {return remquof(__x, __y, __z);}\n" |
| 48718 | "\n" |
| 48719 | "static double\n" |
| 48720 | " _TG_ATTRS\n" |
| 48721 | " __tg_remquo(double __x, double __y, int* __z)\n" |
| 48722 | " {return remquo(__x, __y, __z);}\n" |
| 48723 | "\n" |
| 48724 | "static long double\n" |
| 48725 | " _TG_ATTRS\n" |
| 48726 | " __tg_remquo(long double __x,long double __y, int* __z)\n" |
| 48727 | " {return remquol(__x, __y, __z);}\n" |
| 48728 | "\n" |
| 48729 | "#undef remquo\n" |
| 48730 | "#define remquo(__x, __y, __z) \\\n" |
| 48731 | " __tg_remquo(__tg_promote2((__x), (__y))(__x), \\\n" |
| 48732 | " __tg_promote2((__x), (__y))(__y), \\\n" |
| 48733 | " (__z))\n" |
| 48734 | "\n" |
| 48735 | "// rint\n" |
| 48736 | "\n" |
| 48737 | "static float\n" |
| 48738 | " _TG_ATTRS\n" |
| 48739 | " __tg_rint(float __x) {return rintf(__x);}\n" |
| 48740 | "\n" |
| 48741 | "static double\n" |
| 48742 | " _TG_ATTRS\n" |
| 48743 | " __tg_rint(double __x) {return rint(__x);}\n" |
| 48744 | "\n" |
| 48745 | "static long double\n" |
| 48746 | " _TG_ATTRS\n" |
| 48747 | " __tg_rint(long double __x) {return rintl(__x);}\n" |
| 48748 | "\n" |
| 48749 | "#undef rint\n" |
| 48750 | "#define rint(__x) __tg_rint(__tg_promote1((__x))(__x))\n" |
| 48751 | "\n" |
| 48752 | "// round\n" |
| 48753 | "\n" |
| 48754 | "static float\n" |
| 48755 | " _TG_ATTRS\n" |
| 48756 | " __tg_round(float __x) {return roundf(__x);}\n" |
| 48757 | "\n" |
| 48758 | "static double\n" |
| 48759 | " _TG_ATTRS\n" |
| 48760 | " __tg_round(double __x) {return round(__x);}\n" |
| 48761 | "\n" |
| 48762 | "static long double\n" |
| 48763 | " _TG_ATTRS\n" |
| 48764 | " __tg_round(long double __x) {return roundl(__x);}\n" |
| 48765 | "\n" |
| 48766 | "#undef round\n" |
| 48767 | "#define round(__x) __tg_round(__tg_promote1((__x))(__x))\n" |
| 48768 | "\n" |
| 48769 | "// scalbn\n" |
| 48770 | "\n" |
| 48771 | "static float\n" |
| 48772 | " _TG_ATTRS\n" |
| 48773 | " __tg_scalbn(float __x, int __y) {return scalbnf(__x, __y);}\n" |
| 48774 | "\n" |
| 48775 | "static double\n" |
| 48776 | " _TG_ATTRS\n" |
| 48777 | " __tg_scalbn(double __x, int __y) {return scalbn(__x, __y);}\n" |
| 48778 | "\n" |
| 48779 | "static long double\n" |
| 48780 | " _TG_ATTRS\n" |
| 48781 | " __tg_scalbn(long double __x, int __y) {return scalbnl(__x, __y);}\n" |
| 48782 | "\n" |
| 48783 | "#undef scalbn\n" |
| 48784 | "#define scalbn(__x, __y) __tg_scalbn(__tg_promote1((__x))(__x), __y)\n" |
| 48785 | "\n" |
| 48786 | "// scalbln\n" |
| 48787 | "\n" |
| 48788 | "static float\n" |
| 48789 | " _TG_ATTRS\n" |
| 48790 | " __tg_scalbln(float __x, long __y) {return scalblnf(__x, __y);}\n" |
| 48791 | "\n" |
| 48792 | "static double\n" |
| 48793 | " _TG_ATTRS\n" |
| 48794 | " __tg_scalbln(double __x, long __y) {return scalbln(__x, __y);}\n" |
| 48795 | "\n" |
| 48796 | "static long double\n" |
| 48797 | " _TG_ATTRS\n" |
| 48798 | " __tg_scalbln(long double __x, long __y) {return scalblnl(__x, __y);}\n" |
| 48799 | "\n" |
| 48800 | "#undef scalbln\n" |
| 48801 | "#define scalbln(__x, __y) __tg_scalbln(__tg_promote1((__x))(__x), __y)\n" |
| 48802 | "\n" |
| 48803 | "// tgamma\n" |
| 48804 | "\n" |
| 48805 | "static float\n" |
| 48806 | " _TG_ATTRS\n" |
| 48807 | " __tg_tgamma(float __x) {return tgammaf(__x);}\n" |
| 48808 | "\n" |
| 48809 | "static double\n" |
| 48810 | " _TG_ATTRS\n" |
| 48811 | " __tg_tgamma(double __x) {return tgamma(__x);}\n" |
| 48812 | "\n" |
| 48813 | "static long double\n" |
| 48814 | " _TG_ATTRS\n" |
| 48815 | " __tg_tgamma(long double __x) {return tgammal(__x);}\n" |
| 48816 | "\n" |
| 48817 | "#undef tgamma\n" |
| 48818 | "#define tgamma(__x) __tg_tgamma(__tg_promote1((__x))(__x))\n" |
| 48819 | "\n" |
| 48820 | "// trunc\n" |
| 48821 | "\n" |
| 48822 | "static float\n" |
| 48823 | " _TG_ATTRS\n" |
| 48824 | " __tg_trunc(float __x) {return truncf(__x);}\n" |
| 48825 | "\n" |
| 48826 | "static double\n" |
| 48827 | " _TG_ATTRS\n" |
| 48828 | " __tg_trunc(double __x) {return trunc(__x);}\n" |
| 48829 | "\n" |
| 48830 | "static long double\n" |
| 48831 | " _TG_ATTRS\n" |
| 48832 | " __tg_trunc(long double __x) {return truncl(__x);}\n" |
| 48833 | "\n" |
| 48834 | "#undef trunc\n" |
| 48835 | "#define trunc(__x) __tg_trunc(__tg_promote1((__x))(__x))\n" |
| 48836 | "\n" |
| 48837 | "// carg\n" |
| 48838 | "\n" |
| 48839 | "static float\n" |
| 48840 | " _TG_ATTRS\n" |
| 48841 | " __tg_carg(float __x) {return atan2f(0.F, __x);}\n" |
| 48842 | "\n" |
| 48843 | "static double\n" |
| 48844 | " _TG_ATTRS\n" |
| 48845 | " __tg_carg(double __x) {return atan2(0., __x);}\n" |
| 48846 | "\n" |
| 48847 | "static long double\n" |
| 48848 | " _TG_ATTRS\n" |
| 48849 | " __tg_carg(long double __x) {return atan2l(0.L, __x);}\n" |
| 48850 | "\n" |
| 48851 | "static float\n" |
| 48852 | " _TG_ATTRS\n" |
| 48853 | " __tg_carg(float _Complex __x) {return cargf(__x);}\n" |
| 48854 | "\n" |
| 48855 | "static double\n" |
| 48856 | " _TG_ATTRS\n" |
| 48857 | " __tg_carg(double _Complex __x) {return carg(__x);}\n" |
| 48858 | "\n" |
| 48859 | "static long double\n" |
| 48860 | " _TG_ATTRS\n" |
| 48861 | " __tg_carg(long double _Complex __x) {return cargl(__x);}\n" |
| 48862 | "\n" |
| 48863 | "#undef carg\n" |
| 48864 | "#define carg(__x) __tg_carg(__tg_promote1((__x))(__x))\n" |
| 48865 | "\n" |
| 48866 | "// cimag\n" |
| 48867 | "\n" |
| 48868 | "static float\n" |
| 48869 | " _TG_ATTRS\n" |
| 48870 | " __tg_cimag(float __x) {return 0;}\n" |
| 48871 | "\n" |
| 48872 | "static double\n" |
| 48873 | " _TG_ATTRS\n" |
| 48874 | " __tg_cimag(double __x) {return 0;}\n" |
| 48875 | "\n" |
| 48876 | "static long double\n" |
| 48877 | " _TG_ATTRS\n" |
| 48878 | " __tg_cimag(long double __x) {return 0;}\n" |
| 48879 | "\n" |
| 48880 | "static float\n" |
| 48881 | " _TG_ATTRS\n" |
| 48882 | " __tg_cimag(float _Complex __x) {return cimagf(__x);}\n" |
| 48883 | "\n" |
| 48884 | "static double\n" |
| 48885 | " _TG_ATTRS\n" |
| 48886 | " __tg_cimag(double _Complex __x) {return cimag(__x);}\n" |
| 48887 | "\n" |
| 48888 | "static long double\n" |
| 48889 | " _TG_ATTRS\n" |
| 48890 | " __tg_cimag(long double _Complex __x) {return cimagl(__x);}\n" |
| 48891 | "\n" |
| 48892 | "#undef cimag\n" |
| 48893 | "#define cimag(__x) __tg_cimag(__tg_promote1((__x))(__x))\n" |
| 48894 | "\n" |
| 48895 | "// conj\n" |
| 48896 | "\n" |
| 48897 | "static float _Complex\n" |
| 48898 | " _TG_ATTRS\n" |
| 48899 | " __tg_conj(float __x) {return __x;}\n" |
| 48900 | "\n" |
| 48901 | "static double _Complex\n" |
| 48902 | " _TG_ATTRS\n" |
| 48903 | " __tg_conj(double __x) {return __x;}\n" |
| 48904 | "\n" |
| 48905 | "static long double _Complex\n" |
| 48906 | " _TG_ATTRS\n" |
| 48907 | " __tg_conj(long double __x) {return __x;}\n" |
| 48908 | "\n" |
| 48909 | "static float _Complex\n" |
| 48910 | " _TG_ATTRS\n" |
| 48911 | " __tg_conj(float _Complex __x) {return conjf(__x);}\n" |
| 48912 | "\n" |
| 48913 | "static double _Complex\n" |
| 48914 | " _TG_ATTRS\n" |
| 48915 | " __tg_conj(double _Complex __x) {return conj(__x);}\n" |
| 48916 | "\n" |
| 48917 | "static long double _Complex\n" |
| 48918 | " _TG_ATTRS\n" |
| 48919 | " __tg_conj(long double _Complex __x) {return conjl(__x);}\n" |
| 48920 | "\n" |
| 48921 | "#undef conj\n" |
| 48922 | "#define conj(__x) __tg_conj(__tg_promote1((__x))(__x))\n" |
| 48923 | "\n" |
| 48924 | "// cproj\n" |
| 48925 | "\n" |
| 48926 | "static float _Complex\n" |
| 48927 | " _TG_ATTRS\n" |
| 48928 | " __tg_cproj(float __x) {return cprojf(__x);}\n" |
| 48929 | "\n" |
| 48930 | "static double _Complex\n" |
| 48931 | " _TG_ATTRS\n" |
| 48932 | " __tg_cproj(double __x) {return cproj(__x);}\n" |
| 48933 | "\n" |
| 48934 | "static long double _Complex\n" |
| 48935 | " _TG_ATTRS\n" |
| 48936 | " __tg_cproj(long double __x) {return cprojl(__x);}\n" |
| 48937 | "\n" |
| 48938 | "static float _Complex\n" |
| 48939 | " _TG_ATTRS\n" |
| 48940 | " __tg_cproj(float _Complex __x) {return cprojf(__x);}\n" |
| 48941 | "\n" |
| 48942 | "static double _Complex\n" |
| 48943 | " _TG_ATTRS\n" |
| 48944 | " __tg_cproj(double _Complex __x) {return cproj(__x);}\n" |
| 48945 | "\n" |
| 48946 | "static long double _Complex\n" |
| 48947 | " _TG_ATTRS\n" |
| 48948 | " __tg_cproj(long double _Complex __x) {return cprojl(__x);}\n" |
| 48949 | "\n" |
| 48950 | "#undef cproj\n" |
| 48951 | "#define cproj(__x) __tg_cproj(__tg_promote1((__x))(__x))\n" |
| 48952 | "\n" |
| 48953 | "// creal\n" |
| 48954 | "\n" |
| 48955 | "static float\n" |
| 48956 | " _TG_ATTRS\n" |
| 48957 | " __tg_creal(float __x) {return __x;}\n" |
| 48958 | "\n" |
| 48959 | "static double\n" |
| 48960 | " _TG_ATTRS\n" |
| 48961 | " __tg_creal(double __x) {return __x;}\n" |
| 48962 | "\n" |
| 48963 | "static long double\n" |
| 48964 | " _TG_ATTRS\n" |
| 48965 | " __tg_creal(long double __x) {return __x;}\n" |
| 48966 | "\n" |
| 48967 | "static float\n" |
| 48968 | " _TG_ATTRS\n" |
| 48969 | " __tg_creal(float _Complex __x) {return crealf(__x);}\n" |
| 48970 | "\n" |
| 48971 | "static double\n" |
| 48972 | " _TG_ATTRS\n" |
| 48973 | " __tg_creal(double _Complex __x) {return creal(__x);}\n" |
| 48974 | "\n" |
| 48975 | "static long double\n" |
| 48976 | " _TG_ATTRS\n" |
| 48977 | " __tg_creal(long double _Complex __x) {return creall(__x);}\n" |
| 48978 | "\n" |
| 48979 | "#undef creal\n" |
| 48980 | "#define creal(__x) __tg_creal(__tg_promote1((__x))(__x))\n" |
| 48981 | "\n" |
| 48982 | "#undef _TG_ATTRSp\n" |
| 48983 | "#undef _TG_ATTRS\n" |
| 48984 | "\n" |
| 48985 | "#endif /* __cplusplus */\n" |
| 48986 | "#endif /* __has_include_next */\n" |
| 48987 | "#endif /* __CLANG_TGMATH_H */\n" |
| 48988 | "" } , |
| 48989 | { "/builtins/tmmintrin.h" , "/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===\n" |
| 48990 | " *\n" |
| 48991 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 48992 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 48993 | " * in the Software without restriction, including without limitation the rights\n" |
| 48994 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 48995 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 48996 | " * furnished to do so, subject to the following conditions:\n" |
| 48997 | " *\n" |
| 48998 | " * The above copyright notice and this permission notice shall be included in\n" |
| 48999 | " * all copies or substantial portions of the Software.\n" |
| 49000 | " *\n" |
| 49001 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 49002 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 49003 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 49004 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 49005 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 49006 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 49007 | " * THE SOFTWARE.\n" |
| 49008 | " *\n" |
| 49009 | " *===-----------------------------------------------------------------------===\n" |
| 49010 | " */\n" |
| 49011 | "\n" |
| 49012 | "#ifndef __TMMINTRIN_H\n" |
| 49013 | "#define __TMMINTRIN_H\n" |
| 49014 | "\n" |
| 49015 | "#include <pmmintrin.h>\n" |
| 49016 | "\n" |
| 49017 | "/* Define the default attributes for the functions in this file. */\n" |
| 49018 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"ssse3\"), __min_vector_width__(64)))\n" |
| 49019 | "#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,ssse3\"), __min_vector_width__(64)))\n" |
| 49020 | "\n" |
| 49021 | "/// Computes the absolute value of each of the packed 8-bit signed\n" |
| 49022 | "/// integers in the source operand and stores the 8-bit unsigned integer\n" |
| 49023 | "/// results in the destination.\n" |
| 49024 | "///\n" |
| 49025 | "/// \\headerfile <x86intrin.h>\n" |
| 49026 | "///\n" |
| 49027 | "/// This intrinsic corresponds to the \\c PABSB instruction.\n" |
| 49028 | "///\n" |
| 49029 | "/// \\param __a\n" |
| 49030 | "/// A 64-bit vector of [8 x i8].\n" |
| 49031 | "/// \\returns A 64-bit integer vector containing the absolute values of the\n" |
| 49032 | "/// elements in the operand.\n" |
| 49033 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49034 | "_mm_abs_pi8(__m64 __a)\n" |
| 49035 | "{\n" |
| 49036 | " return (__m64)__builtin_ia32_pabsb((__v8qi)__a);\n" |
| 49037 | "}\n" |
| 49038 | "\n" |
| 49039 | "/// Computes the absolute value of each of the packed 8-bit signed\n" |
| 49040 | "/// integers in the source operand and stores the 8-bit unsigned integer\n" |
| 49041 | "/// results in the destination.\n" |
| 49042 | "///\n" |
| 49043 | "/// \\headerfile <x86intrin.h>\n" |
| 49044 | "///\n" |
| 49045 | "/// This intrinsic corresponds to the \\c VPABSB instruction.\n" |
| 49046 | "///\n" |
| 49047 | "/// \\param __a\n" |
| 49048 | "/// A 128-bit vector of [16 x i8].\n" |
| 49049 | "/// \\returns A 128-bit integer vector containing the absolute values of the\n" |
| 49050 | "/// elements in the operand.\n" |
| 49051 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49052 | "_mm_abs_epi8(__m128i __a)\n" |
| 49053 | "{\n" |
| 49054 | " return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);\n" |
| 49055 | "}\n" |
| 49056 | "\n" |
| 49057 | "/// Computes the absolute value of each of the packed 16-bit signed\n" |
| 49058 | "/// integers in the source operand and stores the 16-bit unsigned integer\n" |
| 49059 | "/// results in the destination.\n" |
| 49060 | "///\n" |
| 49061 | "/// \\headerfile <x86intrin.h>\n" |
| 49062 | "///\n" |
| 49063 | "/// This intrinsic corresponds to the \\c PABSW instruction.\n" |
| 49064 | "///\n" |
| 49065 | "/// \\param __a\n" |
| 49066 | "/// A 64-bit vector of [4 x i16].\n" |
| 49067 | "/// \\returns A 64-bit integer vector containing the absolute values of the\n" |
| 49068 | "/// elements in the operand.\n" |
| 49069 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49070 | "_mm_abs_pi16(__m64 __a)\n" |
| 49071 | "{\n" |
| 49072 | " return (__m64)__builtin_ia32_pabsw((__v4hi)__a);\n" |
| 49073 | "}\n" |
| 49074 | "\n" |
| 49075 | "/// Computes the absolute value of each of the packed 16-bit signed\n" |
| 49076 | "/// integers in the source operand and stores the 16-bit unsigned integer\n" |
| 49077 | "/// results in the destination.\n" |
| 49078 | "///\n" |
| 49079 | "/// \\headerfile <x86intrin.h>\n" |
| 49080 | "///\n" |
| 49081 | "/// This intrinsic corresponds to the \\c VPABSW instruction.\n" |
| 49082 | "///\n" |
| 49083 | "/// \\param __a\n" |
| 49084 | "/// A 128-bit vector of [8 x i16].\n" |
| 49085 | "/// \\returns A 128-bit integer vector containing the absolute values of the\n" |
| 49086 | "/// elements in the operand.\n" |
| 49087 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49088 | "_mm_abs_epi16(__m128i __a)\n" |
| 49089 | "{\n" |
| 49090 | " return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);\n" |
| 49091 | "}\n" |
| 49092 | "\n" |
| 49093 | "/// Computes the absolute value of each of the packed 32-bit signed\n" |
| 49094 | "/// integers in the source operand and stores the 32-bit unsigned integer\n" |
| 49095 | "/// results in the destination.\n" |
| 49096 | "///\n" |
| 49097 | "/// \\headerfile <x86intrin.h>\n" |
| 49098 | "///\n" |
| 49099 | "/// This intrinsic corresponds to the \\c PABSD instruction.\n" |
| 49100 | "///\n" |
| 49101 | "/// \\param __a\n" |
| 49102 | "/// A 64-bit vector of [2 x i32].\n" |
| 49103 | "/// \\returns A 64-bit integer vector containing the absolute values of the\n" |
| 49104 | "/// elements in the operand.\n" |
| 49105 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49106 | "_mm_abs_pi32(__m64 __a)\n" |
| 49107 | "{\n" |
| 49108 | " return (__m64)__builtin_ia32_pabsd((__v2si)__a);\n" |
| 49109 | "}\n" |
| 49110 | "\n" |
| 49111 | "/// Computes the absolute value of each of the packed 32-bit signed\n" |
| 49112 | "/// integers in the source operand and stores the 32-bit unsigned integer\n" |
| 49113 | "/// results in the destination.\n" |
| 49114 | "///\n" |
| 49115 | "/// \\headerfile <x86intrin.h>\n" |
| 49116 | "///\n" |
| 49117 | "/// This intrinsic corresponds to the \\c VPABSD instruction.\n" |
| 49118 | "///\n" |
| 49119 | "/// \\param __a\n" |
| 49120 | "/// A 128-bit vector of [4 x i32].\n" |
| 49121 | "/// \\returns A 128-bit integer vector containing the absolute values of the\n" |
| 49122 | "/// elements in the operand.\n" |
| 49123 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49124 | "_mm_abs_epi32(__m128i __a)\n" |
| 49125 | "{\n" |
| 49126 | " return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);\n" |
| 49127 | "}\n" |
| 49128 | "\n" |
| 49129 | "/// Concatenates the two 128-bit integer vector operands, and\n" |
| 49130 | "/// right-shifts the result by the number of bytes specified in the immediate\n" |
| 49131 | "/// operand.\n" |
| 49132 | "///\n" |
| 49133 | "/// \\headerfile <x86intrin.h>\n" |
| 49134 | "///\n" |
| 49135 | "/// \\code\n" |
| 49136 | "/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);\n" |
| 49137 | "/// \\endcode\n" |
| 49138 | "///\n" |
| 49139 | "/// This intrinsic corresponds to the \\c PALIGNR instruction.\n" |
| 49140 | "///\n" |
| 49141 | "/// \\param a\n" |
| 49142 | "/// A 128-bit vector of [16 x i8] containing one of the source operands.\n" |
| 49143 | "/// \\param b\n" |
| 49144 | "/// A 128-bit vector of [16 x i8] containing one of the source operands.\n" |
| 49145 | "/// \\param n\n" |
| 49146 | "/// An immediate operand specifying how many bytes to right-shift the result.\n" |
| 49147 | "/// \\returns A 128-bit integer vector containing the concatenated right-shifted\n" |
| 49148 | "/// value.\n" |
| 49149 | "#define _mm_alignr_epi8(a, b, n) \\\n" |
| 49150 | " (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \\\n" |
| 49151 | " (__v16qi)(__m128i)(b), (n))\n" |
| 49152 | "\n" |
| 49153 | "/// Concatenates the two 64-bit integer vector operands, and right-shifts\n" |
| 49154 | "/// the result by the number of bytes specified in the immediate operand.\n" |
| 49155 | "///\n" |
| 49156 | "/// \\headerfile <x86intrin.h>\n" |
| 49157 | "///\n" |
| 49158 | "/// \\code\n" |
| 49159 | "/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);\n" |
| 49160 | "/// \\endcode\n" |
| 49161 | "///\n" |
| 49162 | "/// This intrinsic corresponds to the \\c PALIGNR instruction.\n" |
| 49163 | "///\n" |
| 49164 | "/// \\param a\n" |
| 49165 | "/// A 64-bit vector of [8 x i8] containing one of the source operands.\n" |
| 49166 | "/// \\param b\n" |
| 49167 | "/// A 64-bit vector of [8 x i8] containing one of the source operands.\n" |
| 49168 | "/// \\param n\n" |
| 49169 | "/// An immediate operand specifying how many bytes to right-shift the result.\n" |
| 49170 | "/// \\returns A 64-bit integer vector containing the concatenated right-shifted\n" |
| 49171 | "/// value.\n" |
| 49172 | "#define _mm_alignr_pi8(a, b, n) \\\n" |
| 49173 | " (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))\n" |
| 49174 | "\n" |
| 49175 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
| 49176 | "/// 128-bit vectors of [8 x i16].\n" |
| 49177 | "///\n" |
| 49178 | "/// \\headerfile <x86intrin.h>\n" |
| 49179 | "///\n" |
| 49180 | "/// This intrinsic corresponds to the \\c VPHADDW instruction.\n" |
| 49181 | "///\n" |
| 49182 | "/// \\param __a\n" |
| 49183 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
| 49184 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
| 49185 | "/// destination.\n" |
| 49186 | "/// \\param __b\n" |
| 49187 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
| 49188 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
| 49189 | "/// destination.\n" |
| 49190 | "/// \\returns A 128-bit vector of [8 x i16] containing the horizontal sums of\n" |
| 49191 | "/// both operands.\n" |
| 49192 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49193 | "_mm_hadd_epi16(__m128i __a, __m128i __b)\n" |
| 49194 | "{\n" |
| 49195 | " return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);\n" |
| 49196 | "}\n" |
| 49197 | "\n" |
| 49198 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
| 49199 | "/// 128-bit vectors of [4 x i32].\n" |
| 49200 | "///\n" |
| 49201 | "/// \\headerfile <x86intrin.h>\n" |
| 49202 | "///\n" |
| 49203 | "/// This intrinsic corresponds to the \\c VPHADDD instruction.\n" |
| 49204 | "///\n" |
| 49205 | "/// \\param __a\n" |
| 49206 | "/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n" |
| 49207 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
| 49208 | "/// destination.\n" |
| 49209 | "/// \\param __b\n" |
| 49210 | "/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n" |
| 49211 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
| 49212 | "/// destination.\n" |
| 49213 | "/// \\returns A 128-bit vector of [4 x i32] containing the horizontal sums of\n" |
| 49214 | "/// both operands.\n" |
| 49215 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49216 | "_mm_hadd_epi32(__m128i __a, __m128i __b)\n" |
| 49217 | "{\n" |
| 49218 | " return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);\n" |
| 49219 | "}\n" |
| 49220 | "\n" |
| 49221 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
| 49222 | "/// 64-bit vectors of [4 x i16].\n" |
| 49223 | "///\n" |
| 49224 | "/// \\headerfile <x86intrin.h>\n" |
| 49225 | "///\n" |
| 49226 | "/// This intrinsic corresponds to the \\c PHADDW instruction.\n" |
| 49227 | "///\n" |
| 49228 | "/// \\param __a\n" |
| 49229 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
| 49230 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
| 49231 | "/// destination.\n" |
| 49232 | "/// \\param __b\n" |
| 49233 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
| 49234 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
| 49235 | "/// destination.\n" |
| 49236 | "/// \\returns A 64-bit vector of [4 x i16] containing the horizontal sums of both\n" |
| 49237 | "/// operands.\n" |
| 49238 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49239 | "_mm_hadd_pi16(__m64 __a, __m64 __b)\n" |
| 49240 | "{\n" |
| 49241 | " return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);\n" |
| 49242 | "}\n" |
| 49243 | "\n" |
| 49244 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
| 49245 | "/// 64-bit vectors of [2 x i32].\n" |
| 49246 | "///\n" |
| 49247 | "/// \\headerfile <x86intrin.h>\n" |
| 49248 | "///\n" |
| 49249 | "/// This intrinsic corresponds to the \\c PHADDD instruction.\n" |
| 49250 | "///\n" |
| 49251 | "/// \\param __a\n" |
| 49252 | "/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n" |
| 49253 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
| 49254 | "/// destination.\n" |
| 49255 | "/// \\param __b\n" |
| 49256 | "/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n" |
| 49257 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
| 49258 | "/// destination.\n" |
| 49259 | "/// \\returns A 64-bit vector of [2 x i32] containing the horizontal sums of both\n" |
| 49260 | "/// operands.\n" |
| 49261 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49262 | "_mm_hadd_pi32(__m64 __a, __m64 __b)\n" |
| 49263 | "{\n" |
| 49264 | " return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);\n" |
| 49265 | "}\n" |
| 49266 | "\n" |
| 49267 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
| 49268 | "/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are\n" |
| 49269 | "/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n" |
| 49270 | "/// 0x8000.\n" |
| 49271 | "///\n" |
| 49272 | "/// \\headerfile <x86intrin.h>\n" |
| 49273 | "///\n" |
| 49274 | "/// This intrinsic corresponds to the \\c VPHADDSW instruction.\n" |
| 49275 | "///\n" |
| 49276 | "/// \\param __a\n" |
| 49277 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
| 49278 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
| 49279 | "/// destination.\n" |
| 49280 | "/// \\param __b\n" |
| 49281 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
| 49282 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
| 49283 | "/// destination.\n" |
| 49284 | "/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n" |
| 49285 | "/// sums of both operands.\n" |
| 49286 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49287 | "_mm_hadds_epi16(__m128i __a, __m128i __b)\n" |
| 49288 | "{\n" |
| 49289 | " return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);\n" |
| 49290 | "}\n" |
| 49291 | "\n" |
| 49292 | "/// Horizontally adds the adjacent pairs of values contained in 2 packed\n" |
| 49293 | "/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are\n" |
| 49294 | "/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to\n" |
| 49295 | "/// 0x8000.\n" |
| 49296 | "///\n" |
| 49297 | "/// \\headerfile <x86intrin.h>\n" |
| 49298 | "///\n" |
| 49299 | "/// This intrinsic corresponds to the \\c PHADDSW instruction.\n" |
| 49300 | "///\n" |
| 49301 | "/// \\param __a\n" |
| 49302 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
| 49303 | "/// horizontal sums of the values are stored in the lower bits of the\n" |
| 49304 | "/// destination.\n" |
| 49305 | "/// \\param __b\n" |
| 49306 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
| 49307 | "/// horizontal sums of the values are stored in the upper bits of the\n" |
| 49308 | "/// destination.\n" |
| 49309 | "/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n" |
| 49310 | "/// sums of both operands.\n" |
| 49311 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49312 | "_mm_hadds_pi16(__m64 __a, __m64 __b)\n" |
| 49313 | "{\n" |
| 49314 | " return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);\n" |
| 49315 | "}\n" |
| 49316 | "\n" |
| 49317 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
| 49318 | "/// packed 128-bit vectors of [8 x i16].\n" |
| 49319 | "///\n" |
| 49320 | "/// \\headerfile <x86intrin.h>\n" |
| 49321 | "///\n" |
| 49322 | "/// This intrinsic corresponds to the \\c VPHSUBW instruction.\n" |
| 49323 | "///\n" |
| 49324 | "/// \\param __a\n" |
| 49325 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
| 49326 | "/// horizontal differences between the values are stored in the lower bits of\n" |
| 49327 | "/// the destination.\n" |
| 49328 | "/// \\param __b\n" |
| 49329 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
| 49330 | "/// horizontal differences between the values are stored in the upper bits of\n" |
| 49331 | "/// the destination.\n" |
| 49332 | "/// \\returns A 128-bit vector of [8 x i16] containing the horizontal differences\n" |
| 49333 | "/// of both operands.\n" |
| 49334 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49335 | "_mm_hsub_epi16(__m128i __a, __m128i __b)\n" |
| 49336 | "{\n" |
| 49337 | " return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);\n" |
| 49338 | "}\n" |
| 49339 | "\n" |
| 49340 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
| 49341 | "/// packed 128-bit vectors of [4 x i32].\n" |
| 49342 | "///\n" |
| 49343 | "/// \\headerfile <x86intrin.h>\n" |
| 49344 | "///\n" |
| 49345 | "/// This intrinsic corresponds to the \\c VPHSUBD instruction.\n" |
| 49346 | "///\n" |
| 49347 | "/// \\param __a\n" |
| 49348 | "/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n" |
| 49349 | "/// horizontal differences between the values are stored in the lower bits of\n" |
| 49350 | "/// the destination.\n" |
| 49351 | "/// \\param __b\n" |
| 49352 | "/// A 128-bit vector of [4 x i32] containing one of the source operands. The\n" |
| 49353 | "/// horizontal differences between the values are stored in the upper bits of\n" |
| 49354 | "/// the destination.\n" |
| 49355 | "/// \\returns A 128-bit vector of [4 x i32] containing the horizontal differences\n" |
| 49356 | "/// of both operands.\n" |
| 49357 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49358 | "_mm_hsub_epi32(__m128i __a, __m128i __b)\n" |
| 49359 | "{\n" |
| 49360 | " return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);\n" |
| 49361 | "}\n" |
| 49362 | "\n" |
| 49363 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
| 49364 | "/// packed 64-bit vectors of [4 x i16].\n" |
| 49365 | "///\n" |
| 49366 | "/// \\headerfile <x86intrin.h>\n" |
| 49367 | "///\n" |
| 49368 | "/// This intrinsic corresponds to the \\c PHSUBW instruction.\n" |
| 49369 | "///\n" |
| 49370 | "/// \\param __a\n" |
| 49371 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
| 49372 | "/// horizontal differences between the values are stored in the lower bits of\n" |
| 49373 | "/// the destination.\n" |
| 49374 | "/// \\param __b\n" |
| 49375 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
| 49376 | "/// horizontal differences between the values are stored in the upper bits of\n" |
| 49377 | "/// the destination.\n" |
| 49378 | "/// \\returns A 64-bit vector of [4 x i16] containing the horizontal differences\n" |
| 49379 | "/// of both operands.\n" |
| 49380 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49381 | "_mm_hsub_pi16(__m64 __a, __m64 __b)\n" |
| 49382 | "{\n" |
| 49383 | " return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);\n" |
| 49384 | "}\n" |
| 49385 | "\n" |
| 49386 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
| 49387 | "/// packed 64-bit vectors of [2 x i32].\n" |
| 49388 | "///\n" |
| 49389 | "/// \\headerfile <x86intrin.h>\n" |
| 49390 | "///\n" |
| 49391 | "/// This intrinsic corresponds to the \\c PHSUBD instruction.\n" |
| 49392 | "///\n" |
| 49393 | "/// \\param __a\n" |
| 49394 | "/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n" |
| 49395 | "/// horizontal differences between the values are stored in the lower bits of\n" |
| 49396 | "/// the destination.\n" |
| 49397 | "/// \\param __b\n" |
| 49398 | "/// A 64-bit vector of [2 x i32] containing one of the source operands. The\n" |
| 49399 | "/// horizontal differences between the values are stored in the upper bits of\n" |
| 49400 | "/// the destination.\n" |
| 49401 | "/// \\returns A 64-bit vector of [2 x i32] containing the horizontal differences\n" |
| 49402 | "/// of both operands.\n" |
| 49403 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49404 | "_mm_hsub_pi32(__m64 __a, __m64 __b)\n" |
| 49405 | "{\n" |
| 49406 | " return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);\n" |
| 49407 | "}\n" |
| 49408 | "\n" |
| 49409 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
| 49410 | "/// packed 128-bit vectors of [8 x i16]. Positive differences greater than\n" |
| 49411 | "/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n" |
| 49412 | "/// saturated to 0x8000.\n" |
| 49413 | "///\n" |
| 49414 | "/// \\headerfile <x86intrin.h>\n" |
| 49415 | "///\n" |
| 49416 | "/// This intrinsic corresponds to the \\c VPHSUBSW instruction.\n" |
| 49417 | "///\n" |
| 49418 | "/// \\param __a\n" |
| 49419 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
| 49420 | "/// horizontal differences between the values are stored in the lower bits of\n" |
| 49421 | "/// the destination.\n" |
| 49422 | "/// \\param __b\n" |
| 49423 | "/// A 128-bit vector of [8 x i16] containing one of the source operands. The\n" |
| 49424 | "/// horizontal differences between the values are stored in the upper bits of\n" |
| 49425 | "/// the destination.\n" |
| 49426 | "/// \\returns A 128-bit vector of [8 x i16] containing the horizontal saturated\n" |
| 49427 | "/// differences of both operands.\n" |
| 49428 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49429 | "_mm_hsubs_epi16(__m128i __a, __m128i __b)\n" |
| 49430 | "{\n" |
| 49431 | " return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);\n" |
| 49432 | "}\n" |
| 49433 | "\n" |
| 49434 | "/// Horizontally subtracts the adjacent pairs of values contained in 2\n" |
| 49435 | "/// packed 64-bit vectors of [4 x i16]. Positive differences greater than\n" |
| 49436 | "/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are\n" |
| 49437 | "/// saturated to 0x8000.\n" |
| 49438 | "///\n" |
| 49439 | "/// \\headerfile <x86intrin.h>\n" |
| 49440 | "///\n" |
| 49441 | "/// This intrinsic corresponds to the \\c PHSUBSW instruction.\n" |
| 49442 | "///\n" |
| 49443 | "/// \\param __a\n" |
| 49444 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
| 49445 | "/// horizontal differences between the values are stored in the lower bits of\n" |
| 49446 | "/// the destination.\n" |
| 49447 | "/// \\param __b\n" |
| 49448 | "/// A 64-bit vector of [4 x i16] containing one of the source operands. The\n" |
| 49449 | "/// horizontal differences between the values are stored in the upper bits of\n" |
| 49450 | "/// the destination.\n" |
| 49451 | "/// \\returns A 64-bit vector of [4 x i16] containing the horizontal saturated\n" |
| 49452 | "/// differences of both operands.\n" |
| 49453 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49454 | "_mm_hsubs_pi16(__m64 __a, __m64 __b)\n" |
| 49455 | "{\n" |
| 49456 | " return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);\n" |
| 49457 | "}\n" |
| 49458 | "\n" |
| 49459 | "/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n" |
| 49460 | "/// values contained in the first source operand and packed 8-bit signed\n" |
| 49461 | "/// integer values contained in the second source operand, adds pairs of\n" |
| 49462 | "/// contiguous products with signed saturation, and writes the 16-bit sums to\n" |
| 49463 | "/// the corresponding bits in the destination.\n" |
| 49464 | "///\n" |
| 49465 | "/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n" |
| 49466 | "/// both operands are multiplied, and the sum of both results is written to\n" |
| 49467 | "/// bits [15:0] of the destination.\n" |
| 49468 | "///\n" |
| 49469 | "/// \\headerfile <x86intrin.h>\n" |
| 49470 | "///\n" |
| 49471 | "/// This intrinsic corresponds to the \\c VPMADDUBSW instruction.\n" |
| 49472 | "///\n" |
| 49473 | "/// \\param __a\n" |
| 49474 | "/// A 128-bit integer vector containing the first source operand.\n" |
| 49475 | "/// \\param __b\n" |
| 49476 | "/// A 128-bit integer vector containing the second source operand.\n" |
| 49477 | "/// \\returns A 128-bit integer vector containing the sums of products of both\n" |
| 49478 | "/// operands: \\n\n" |
| 49479 | "/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n" |
| 49480 | "/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n" |
| 49481 | "/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n" |
| 49482 | "/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7) \\n\n" |
| 49483 | "/// \\a R4 := (\\a __a8 * \\a __b8) + (\\a __a9 * \\a __b9) \\n\n" |
| 49484 | "/// \\a R5 := (\\a __a10 * \\a __b10) + (\\a __a11 * \\a __b11) \\n\n" |
| 49485 | "/// \\a R6 := (\\a __a12 * \\a __b12) + (\\a __a13 * \\a __b13) \\n\n" |
| 49486 | "/// \\a R7 := (\\a __a14 * \\a __b14) + (\\a __a15 * \\a __b15)\n" |
| 49487 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49488 | "_mm_maddubs_epi16(__m128i __a, __m128i __b)\n" |
| 49489 | "{\n" |
| 49490 | " return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);\n" |
| 49491 | "}\n" |
| 49492 | "\n" |
| 49493 | "/// Multiplies corresponding pairs of packed 8-bit unsigned integer\n" |
| 49494 | "/// values contained in the first source operand and packed 8-bit signed\n" |
| 49495 | "/// integer values contained in the second source operand, adds pairs of\n" |
| 49496 | "/// contiguous products with signed saturation, and writes the 16-bit sums to\n" |
| 49497 | "/// the corresponding bits in the destination.\n" |
| 49498 | "///\n" |
| 49499 | "/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of\n" |
| 49500 | "/// both operands are multiplied, and the sum of both results is written to\n" |
| 49501 | "/// bits [15:0] of the destination.\n" |
| 49502 | "///\n" |
| 49503 | "/// \\headerfile <x86intrin.h>\n" |
| 49504 | "///\n" |
| 49505 | "/// This intrinsic corresponds to the \\c PMADDUBSW instruction.\n" |
| 49506 | "///\n" |
| 49507 | "/// \\param __a\n" |
| 49508 | "/// A 64-bit integer vector containing the first source operand.\n" |
| 49509 | "/// \\param __b\n" |
| 49510 | "/// A 64-bit integer vector containing the second source operand.\n" |
| 49511 | "/// \\returns A 64-bit integer vector containing the sums of products of both\n" |
| 49512 | "/// operands: \\n\n" |
| 49513 | "/// \\a R0 := (\\a __a0 * \\a __b0) + (\\a __a1 * \\a __b1) \\n\n" |
| 49514 | "/// \\a R1 := (\\a __a2 * \\a __b2) + (\\a __a3 * \\a __b3) \\n\n" |
| 49515 | "/// \\a R2 := (\\a __a4 * \\a __b4) + (\\a __a5 * \\a __b5) \\n\n" |
| 49516 | "/// \\a R3 := (\\a __a6 * \\a __b6) + (\\a __a7 * \\a __b7)\n" |
| 49517 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49518 | "_mm_maddubs_pi16(__m64 __a, __m64 __b)\n" |
| 49519 | "{\n" |
| 49520 | " return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);\n" |
| 49521 | "}\n" |
| 49522 | "\n" |
| 49523 | "/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n" |
| 49524 | "/// products to the 18 most significant bits by right-shifting, rounds the\n" |
| 49525 | "/// truncated value by adding 1, and writes bits [16:1] to the destination.\n" |
| 49526 | "///\n" |
| 49527 | "/// \\headerfile <x86intrin.h>\n" |
| 49528 | "///\n" |
| 49529 | "/// This intrinsic corresponds to the \\c VPMULHRSW instruction.\n" |
| 49530 | "///\n" |
| 49531 | "/// \\param __a\n" |
| 49532 | "/// A 128-bit vector of [8 x i16] containing one of the source operands.\n" |
| 49533 | "/// \\param __b\n" |
| 49534 | "/// A 128-bit vector of [8 x i16] containing one of the source operands.\n" |
| 49535 | "/// \\returns A 128-bit vector of [8 x i16] containing the rounded and scaled\n" |
| 49536 | "/// products of both operands.\n" |
| 49537 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49538 | "_mm_mulhrs_epi16(__m128i __a, __m128i __b)\n" |
| 49539 | "{\n" |
| 49540 | " return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);\n" |
| 49541 | "}\n" |
| 49542 | "\n" |
| 49543 | "/// Multiplies packed 16-bit signed integer values, truncates the 32-bit\n" |
| 49544 | "/// products to the 18 most significant bits by right-shifting, rounds the\n" |
| 49545 | "/// truncated value by adding 1, and writes bits [16:1] to the destination.\n" |
| 49546 | "///\n" |
| 49547 | "/// \\headerfile <x86intrin.h>\n" |
| 49548 | "///\n" |
| 49549 | "/// This intrinsic corresponds to the \\c PMULHRSW instruction.\n" |
| 49550 | "///\n" |
| 49551 | "/// \\param __a\n" |
| 49552 | "/// A 64-bit vector of [4 x i16] containing one of the source operands.\n" |
| 49553 | "/// \\param __b\n" |
| 49554 | "/// A 64-bit vector of [4 x i16] containing one of the source operands.\n" |
| 49555 | "/// \\returns A 64-bit vector of [4 x i16] containing the rounded and scaled\n" |
| 49556 | "/// products of both operands.\n" |
| 49557 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49558 | "_mm_mulhrs_pi16(__m64 __a, __m64 __b)\n" |
| 49559 | "{\n" |
| 49560 | " return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);\n" |
| 49561 | "}\n" |
| 49562 | "\n" |
| 49563 | "/// Copies the 8-bit integers from a 128-bit integer vector to the\n" |
| 49564 | "/// destination or clears 8-bit values in the destination, as specified by\n" |
| 49565 | "/// the second source operand.\n" |
| 49566 | "///\n" |
| 49567 | "/// \\headerfile <x86intrin.h>\n" |
| 49568 | "///\n" |
| 49569 | "/// This intrinsic corresponds to the \\c VPSHUFB instruction.\n" |
| 49570 | "///\n" |
| 49571 | "/// \\param __a\n" |
| 49572 | "/// A 128-bit integer vector containing the values to be copied.\n" |
| 49573 | "/// \\param __b\n" |
| 49574 | "/// A 128-bit integer vector containing control bytes corresponding to\n" |
| 49575 | "/// positions in the destination:\n" |
| 49576 | "/// Bit 7: \\n\n" |
| 49577 | "/// 1: Clear the corresponding byte in the destination. \\n\n" |
| 49578 | "/// 0: Copy the selected source byte to the corresponding byte in the\n" |
| 49579 | "/// destination. \\n\n" |
| 49580 | "/// Bits [6:4] Reserved. \\n\n" |
| 49581 | "/// Bits [3:0] select the source byte to be copied.\n" |
| 49582 | "/// \\returns A 128-bit integer vector containing the copied or cleared values.\n" |
| 49583 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49584 | "_mm_shuffle_epi8(__m128i __a, __m128i __b)\n" |
| 49585 | "{\n" |
| 49586 | " return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);\n" |
| 49587 | "}\n" |
| 49588 | "\n" |
| 49589 | "/// Copies the 8-bit integers from a 64-bit integer vector to the\n" |
| 49590 | "/// destination or clears 8-bit values in the destination, as specified by\n" |
| 49591 | "/// the second source operand.\n" |
| 49592 | "///\n" |
| 49593 | "/// \\headerfile <x86intrin.h>\n" |
| 49594 | "///\n" |
| 49595 | "/// This intrinsic corresponds to the \\c PSHUFB instruction.\n" |
| 49596 | "///\n" |
| 49597 | "/// \\param __a\n" |
| 49598 | "/// A 64-bit integer vector containing the values to be copied.\n" |
| 49599 | "/// \\param __b\n" |
| 49600 | "/// A 64-bit integer vector containing control bytes corresponding to\n" |
| 49601 | "/// positions in the destination:\n" |
| 49602 | "/// Bit 7: \\n\n" |
| 49603 | "/// 1: Clear the corresponding byte in the destination. \\n\n" |
| 49604 | "/// 0: Copy the selected source byte to the corresponding byte in the\n" |
| 49605 | "/// destination. \\n\n" |
| 49606 | "/// Bits [3:0] select the source byte to be copied.\n" |
| 49607 | "/// \\returns A 64-bit integer vector containing the copied or cleared values.\n" |
| 49608 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49609 | "_mm_shuffle_pi8(__m64 __a, __m64 __b)\n" |
| 49610 | "{\n" |
| 49611 | " return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);\n" |
| 49612 | "}\n" |
| 49613 | "\n" |
| 49614 | "/// For each 8-bit integer in the first source operand, perform one of\n" |
| 49615 | "/// the following actions as specified by the second source operand.\n" |
| 49616 | "///\n" |
| 49617 | "/// If the byte in the second source is negative, calculate the two's\n" |
| 49618 | "/// complement of the corresponding byte in the first source, and write that\n" |
| 49619 | "/// value to the destination. If the byte in the second source is positive,\n" |
| 49620 | "/// copy the corresponding byte from the first source to the destination. If\n" |
| 49621 | "/// the byte in the second source is zero, clear the corresponding byte in\n" |
| 49622 | "/// the destination.\n" |
| 49623 | "///\n" |
| 49624 | "/// \\headerfile <x86intrin.h>\n" |
| 49625 | "///\n" |
| 49626 | "/// This intrinsic corresponds to the \\c VPSIGNB instruction.\n" |
| 49627 | "///\n" |
| 49628 | "/// \\param __a\n" |
| 49629 | "/// A 128-bit integer vector containing the values to be copied.\n" |
| 49630 | "/// \\param __b\n" |
| 49631 | "/// A 128-bit integer vector containing control bytes corresponding to\n" |
| 49632 | "/// positions in the destination.\n" |
| 49633 | "/// \\returns A 128-bit integer vector containing the resultant values.\n" |
| 49634 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49635 | "_mm_sign_epi8(__m128i __a, __m128i __b)\n" |
| 49636 | "{\n" |
| 49637 | " return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);\n" |
| 49638 | "}\n" |
| 49639 | "\n" |
| 49640 | "/// For each 16-bit integer in the first source operand, perform one of\n" |
| 49641 | "/// the following actions as specified by the second source operand.\n" |
| 49642 | "///\n" |
| 49643 | "/// If the word in the second source is negative, calculate the two's\n" |
| 49644 | "/// complement of the corresponding word in the first source, and write that\n" |
| 49645 | "/// value to the destination. If the word in the second source is positive,\n" |
| 49646 | "/// copy the corresponding word from the first source to the destination. If\n" |
| 49647 | "/// the word in the second source is zero, clear the corresponding word in\n" |
| 49648 | "/// the destination.\n" |
| 49649 | "///\n" |
| 49650 | "/// \\headerfile <x86intrin.h>\n" |
| 49651 | "///\n" |
| 49652 | "/// This intrinsic corresponds to the \\c VPSIGNW instruction.\n" |
| 49653 | "///\n" |
| 49654 | "/// \\param __a\n" |
| 49655 | "/// A 128-bit integer vector containing the values to be copied.\n" |
| 49656 | "/// \\param __b\n" |
| 49657 | "/// A 128-bit integer vector containing control words corresponding to\n" |
| 49658 | "/// positions in the destination.\n" |
| 49659 | "/// \\returns A 128-bit integer vector containing the resultant values.\n" |
| 49660 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49661 | "_mm_sign_epi16(__m128i __a, __m128i __b)\n" |
| 49662 | "{\n" |
| 49663 | " return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);\n" |
| 49664 | "}\n" |
| 49665 | "\n" |
| 49666 | "/// For each 32-bit integer in the first source operand, perform one of\n" |
| 49667 | "/// the following actions as specified by the second source operand.\n" |
| 49668 | "///\n" |
| 49669 | "/// If the doubleword in the second source is negative, calculate the two's\n" |
| 49670 | "/// complement of the corresponding word in the first source, and write that\n" |
| 49671 | "/// value to the destination. If the doubleword in the second source is\n" |
| 49672 | "/// positive, copy the corresponding word from the first source to the\n" |
| 49673 | "/// destination. If the doubleword in the second source is zero, clear the\n" |
| 49674 | "/// corresponding word in the destination.\n" |
| 49675 | "///\n" |
| 49676 | "/// \\headerfile <x86intrin.h>\n" |
| 49677 | "///\n" |
| 49678 | "/// This intrinsic corresponds to the \\c VPSIGND instruction.\n" |
| 49679 | "///\n" |
| 49680 | "/// \\param __a\n" |
| 49681 | "/// A 128-bit integer vector containing the values to be copied.\n" |
| 49682 | "/// \\param __b\n" |
| 49683 | "/// A 128-bit integer vector containing control doublewords corresponding to\n" |
| 49684 | "/// positions in the destination.\n" |
| 49685 | "/// \\returns A 128-bit integer vector containing the resultant values.\n" |
| 49686 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 49687 | "_mm_sign_epi32(__m128i __a, __m128i __b)\n" |
| 49688 | "{\n" |
| 49689 | " return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);\n" |
| 49690 | "}\n" |
| 49691 | "\n" |
| 49692 | "/// For each 8-bit integer in the first source operand, perform one of\n" |
| 49693 | "/// the following actions as specified by the second source operand.\n" |
| 49694 | "///\n" |
| 49695 | "/// If the byte in the second source is negative, calculate the two's\n" |
| 49696 | "/// complement of the corresponding byte in the first source, and write that\n" |
| 49697 | "/// value to the destination. If the byte in the second source is positive,\n" |
| 49698 | "/// copy the corresponding byte from the first source to the destination. If\n" |
| 49699 | "/// the byte in the second source is zero, clear the corresponding byte in\n" |
| 49700 | "/// the destination.\n" |
| 49701 | "///\n" |
| 49702 | "/// \\headerfile <x86intrin.h>\n" |
| 49703 | "///\n" |
| 49704 | "/// This intrinsic corresponds to the \\c PSIGNB instruction.\n" |
| 49705 | "///\n" |
| 49706 | "/// \\param __a\n" |
| 49707 | "/// A 64-bit integer vector containing the values to be copied.\n" |
| 49708 | "/// \\param __b\n" |
| 49709 | "/// A 64-bit integer vector containing control bytes corresponding to\n" |
| 49710 | "/// positions in the destination.\n" |
| 49711 | "/// \\returns A 64-bit integer vector containing the resultant values.\n" |
| 49712 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49713 | "_mm_sign_pi8(__m64 __a, __m64 __b)\n" |
| 49714 | "{\n" |
| 49715 | " return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);\n" |
| 49716 | "}\n" |
| 49717 | "\n" |
| 49718 | "/// For each 16-bit integer in the first source operand, perform one of\n" |
| 49719 | "/// the following actions as specified by the second source operand.\n" |
| 49720 | "///\n" |
| 49721 | "/// If the word in the second source is negative, calculate the two's\n" |
| 49722 | "/// complement of the corresponding word in the first source, and write that\n" |
| 49723 | "/// value to the destination. If the word in the second source is positive,\n" |
| 49724 | "/// copy the corresponding word from the first source to the destination. If\n" |
| 49725 | "/// the word in the second source is zero, clear the corresponding word in\n" |
| 49726 | "/// the destination.\n" |
| 49727 | "///\n" |
| 49728 | "/// \\headerfile <x86intrin.h>\n" |
| 49729 | "///\n" |
| 49730 | "/// This intrinsic corresponds to the \\c PSIGNW instruction.\n" |
| 49731 | "///\n" |
| 49732 | "/// \\param __a\n" |
| 49733 | "/// A 64-bit integer vector containing the values to be copied.\n" |
| 49734 | "/// \\param __b\n" |
| 49735 | "/// A 64-bit integer vector containing control words corresponding to\n" |
| 49736 | "/// positions in the destination.\n" |
| 49737 | "/// \\returns A 64-bit integer vector containing the resultant values.\n" |
| 49738 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49739 | "_mm_sign_pi16(__m64 __a, __m64 __b)\n" |
| 49740 | "{\n" |
| 49741 | " return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);\n" |
| 49742 | "}\n" |
| 49743 | "\n" |
| 49744 | "/// For each 32-bit integer in the first source operand, perform one of\n" |
| 49745 | "/// the following actions as specified by the second source operand.\n" |
| 49746 | "///\n" |
| 49747 | "/// If the doubleword in the second source is negative, calculate the two's\n" |
| 49748 | "/// complement of the corresponding doubleword in the first source, and\n" |
| 49749 | "/// write that value to the destination. If the doubleword in the second\n" |
| 49750 | "/// source is positive, copy the corresponding doubleword from the first\n" |
| 49751 | "/// source to the destination. If the doubleword in the second source is\n" |
| 49752 | "/// zero, clear the corresponding doubleword in the destination.\n" |
| 49753 | "///\n" |
| 49754 | "/// \\headerfile <x86intrin.h>\n" |
| 49755 | "///\n" |
| 49756 | "/// This intrinsic corresponds to the \\c PSIGND instruction.\n" |
| 49757 | "///\n" |
| 49758 | "/// \\param __a\n" |
| 49759 | "/// A 64-bit integer vector containing the values to be copied.\n" |
| 49760 | "/// \\param __b\n" |
| 49761 | "/// A 64-bit integer vector containing two control doublewords corresponding\n" |
| 49762 | "/// to positions in the destination.\n" |
| 49763 | "/// \\returns A 64-bit integer vector containing the resultant values.\n" |
| 49764 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 49765 | "_mm_sign_pi32(__m64 __a, __m64 __b)\n" |
| 49766 | "{\n" |
| 49767 | " return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);\n" |
| 49768 | "}\n" |
| 49769 | "\n" |
| 49770 | "#undef __DEFAULT_FN_ATTRS\n" |
| 49771 | "#undef __DEFAULT_FN_ATTRS_MMX\n" |
| 49772 | "\n" |
| 49773 | "#endif /* __TMMINTRIN_H */\n" |
| 49774 | "" } , |
| 49775 | { "/builtins/unwind.h" , "/*===---- unwind.h - Stack unwinding ----------------------------------------===\n" |
| 49776 | " *\n" |
| 49777 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 49778 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 49779 | " * in the Software without restriction, including without limitation the rights\n" |
| 49780 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 49781 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 49782 | " * furnished to do so, subject to the following conditions:\n" |
| 49783 | " *\n" |
| 49784 | " * The above copyright notice and this permission notice shall be included in\n" |
| 49785 | " * all copies or substantial portions of the Software.\n" |
| 49786 | " *\n" |
| 49787 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 49788 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 49789 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 49790 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 49791 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 49792 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 49793 | " * THE SOFTWARE.\n" |
| 49794 | " *\n" |
| 49795 | " *===-----------------------------------------------------------------------===\n" |
| 49796 | " */\n" |
| 49797 | "\n" |
| 49798 | "/* See \"Data Definitions for libgcc_s\" in the Linux Standard Base.*/\n" |
| 49799 | "\n" |
| 49800 | "#if defined(__APPLE__) && __has_include_next(<unwind.h>)\n" |
| 49801 | "/* Darwin (from 11.x on) provide an unwind.h. If that's available,\n" |
| 49802 | " * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,\n" |
| 49803 | " * so define that around the include.*/\n" |
| 49804 | "# ifndef _GNU_SOURCE\n" |
| 49805 | "# define _SHOULD_UNDEFINE_GNU_SOURCE\n" |
| 49806 | "# define _GNU_SOURCE\n" |
| 49807 | "# endif\n" |
| 49808 | "// libunwind's unwind.h reflects the current visibility. However, Mozilla\n" |
| 49809 | "// builds with -fvisibility=hidden and relies on gcc's unwind.h to reset the\n" |
| 49810 | "// visibility to default and export its contents. gcc also allows users to\n" |
| 49811 | "// override its override by #defining HIDE_EXPORTS (but note, this only obeys\n" |
| 49812 | "// the user's -fvisibility setting; it doesn't hide any exports on its own). We\n" |
| 49813 | "// imitate gcc's header here:\n" |
| 49814 | "# ifdef HIDE_EXPORTS\n" |
| 49815 | "# include_next <unwind.h>\n" |
| 49816 | "# else\n" |
| 49817 | "# pragma GCC visibility push(default)\n" |
| 49818 | "# include_next <unwind.h>\n" |
| 49819 | "# pragma GCC visibility pop\n" |
| 49820 | "# endif\n" |
| 49821 | "# ifdef _SHOULD_UNDEFINE_GNU_SOURCE\n" |
| 49822 | "# undef _GNU_SOURCE\n" |
| 49823 | "# undef _SHOULD_UNDEFINE_GNU_SOURCE\n" |
| 49824 | "# endif\n" |
| 49825 | "#else\n" |
| 49826 | "\n" |
| 49827 | "#ifndef __CLANG_UNWIND_H\n" |
| 49828 | "#define __CLANG_UNWIND_H\n" |
| 49829 | "\n" |
| 49830 | "#include <stdint.h>\n" |
| 49831 | "\n" |
| 49832 | "#ifdef __cplusplus\n" |
| 49833 | "extern \"C\" {\n" |
| 49834 | "#endif\n" |
| 49835 | "\n" |
| 49836 | "/* It is a bit strange for a header to play with the visibility of the\n" |
| 49837 | " symbols it declares, but this matches gcc's behavior and some programs\n" |
| 49838 | " depend on it */\n" |
| 49839 | "#ifndef HIDE_EXPORTS\n" |
| 49840 | "#pragma GCC visibility push(default)\n" |
| 49841 | "#endif\n" |
| 49842 | "\n" |
| 49843 | "typedef uintptr_t _Unwind_Word;\n" |
| 49844 | "typedef intptr_t _Unwind_Sword;\n" |
| 49845 | "typedef uintptr_t _Unwind_Ptr;\n" |
| 49846 | "typedef uintptr_t _Unwind_Internal_Ptr;\n" |
| 49847 | "typedef uint64_t _Unwind_Exception_Class;\n" |
| 49848 | "\n" |
| 49849 | "typedef intptr_t _sleb128_t;\n" |
| 49850 | "typedef uintptr_t _uleb128_t;\n" |
| 49851 | "\n" |
| 49852 | "struct _Unwind_Context;\n" |
| 49853 | "#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n" |
| 49854 | "struct _Unwind_Control_Block;\n" |
| 49855 | "typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */\n" |
| 49856 | "#else\n" |
| 49857 | "struct _Unwind_Exception;\n" |
| 49858 | "typedef struct _Unwind_Exception _Unwind_Exception;\n" |
| 49859 | "#endif\n" |
| 49860 | "typedef enum {\n" |
| 49861 | " _URC_NO_REASON = 0,\n" |
| 49862 | "#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n" |
| 49863 | " !defined(__ARM_DWARF_EH__)\n" |
| 49864 | " _URC_OK = 0, /* used by ARM EHABI */\n" |
| 49865 | "#endif\n" |
| 49866 | " _URC_FOREIGN_EXCEPTION_CAUGHT = 1,\n" |
| 49867 | "\n" |
| 49868 | " _URC_FATAL_PHASE2_ERROR = 2,\n" |
| 49869 | " _URC_FATAL_PHASE1_ERROR = 3,\n" |
| 49870 | " _URC_NORMAL_STOP = 4,\n" |
| 49871 | "\n" |
| 49872 | " _URC_END_OF_STACK = 5,\n" |
| 49873 | " _URC_HANDLER_FOUND = 6,\n" |
| 49874 | " _URC_INSTALL_CONTEXT = 7,\n" |
| 49875 | " _URC_CONTINUE_UNWIND = 8,\n" |
| 49876 | "#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \\\n" |
| 49877 | " !defined(__ARM_DWARF_EH__)\n" |
| 49878 | " _URC_FAILURE = 9 /* used by ARM EHABI */\n" |
| 49879 | "#endif\n" |
| 49880 | "} _Unwind_Reason_Code;\n" |
| 49881 | "\n" |
| 49882 | "typedef enum {\n" |
| 49883 | " _UA_SEARCH_PHASE = 1,\n" |
| 49884 | " _UA_CLEANUP_PHASE = 2,\n" |
| 49885 | "\n" |
| 49886 | " _UA_HANDLER_FRAME = 4,\n" |
| 49887 | " _UA_FORCE_UNWIND = 8,\n" |
| 49888 | " _UA_END_OF_STACK = 16 /* gcc extension to C++ ABI */\n" |
| 49889 | "} _Unwind_Action;\n" |
| 49890 | "\n" |
| 49891 | "typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code,\n" |
| 49892 | " _Unwind_Exception *);\n" |
| 49893 | "\n" |
| 49894 | "#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n" |
| 49895 | "typedef struct _Unwind_Control_Block _Unwind_Control_Block;\n" |
| 49896 | "typedef uint32_t _Unwind_EHT_Header;\n" |
| 49897 | "\n" |
| 49898 | "struct _Unwind_Control_Block {\n" |
| 49899 | " uint64_t exception_class;\n" |
| 49900 | " void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *);\n" |
| 49901 | " /* unwinder cache (private fields for the unwinder's use) */\n" |
| 49902 | " struct {\n" |
| 49903 | " uint32_t reserved1; /* forced unwind stop function, 0 if not forced */\n" |
| 49904 | " uint32_t reserved2; /* personality routine */\n" |
| 49905 | " uint32_t reserved3; /* callsite */\n" |
| 49906 | " uint32_t reserved4; /* forced unwind stop argument */\n" |
| 49907 | " uint32_t reserved5;\n" |
| 49908 | " } unwinder_cache;\n" |
| 49909 | " /* propagation barrier cache (valid after phase 1) */\n" |
| 49910 | " struct {\n" |
| 49911 | " uint32_t sp;\n" |
| 49912 | " uint32_t bitpattern[5];\n" |
| 49913 | " } barrier_cache;\n" |
| 49914 | " /* cleanup cache (preserved over cleanup) */\n" |
| 49915 | " struct {\n" |
| 49916 | " uint32_t bitpattern[4];\n" |
| 49917 | " } cleanup_cache;\n" |
| 49918 | " /* personality cache (for personality's benefit) */\n" |
| 49919 | " struct {\n" |
| 49920 | " uint32_t fnstart; /* function start address */\n" |
| 49921 | " _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */\n" |
| 49922 | " uint32_t additional; /* additional data */\n" |
| 49923 | " uint32_t reserved1;\n" |
| 49924 | " } pr_cache;\n" |
| 49925 | " long long int : 0; /* force alignment of next item to 8-byte boundary */\n" |
| 49926 | "} __attribute__((__aligned__(8)));\n" |
| 49927 | "#else\n" |
| 49928 | "struct _Unwind_Exception {\n" |
| 49929 | " _Unwind_Exception_Class exception_class;\n" |
| 49930 | " _Unwind_Exception_Cleanup_Fn exception_cleanup;\n" |
| 49931 | "#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)\n" |
| 49932 | " _Unwind_Word private_[6];\n" |
| 49933 | "#else\n" |
| 49934 | " _Unwind_Word private_1;\n" |
| 49935 | " _Unwind_Word private_2;\n" |
| 49936 | "#endif\n" |
| 49937 | " /* The Itanium ABI requires that _Unwind_Exception objects are \"double-word\n" |
| 49938 | " * aligned\". GCC has interpreted this to mean \"use the maximum useful\n" |
| 49939 | " * alignment for the target\"; so do we. */\n" |
| 49940 | "} __attribute__((__aligned__));\n" |
| 49941 | "#endif\n" |
| 49942 | "\n" |
| 49943 | "typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action,\n" |
| 49944 | " _Unwind_Exception_Class,\n" |
| 49945 | " _Unwind_Exception *,\n" |
| 49946 | " struct _Unwind_Context *,\n" |
| 49947 | " void *);\n" |
| 49948 | "\n" |
| 49949 | "typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action,\n" |
| 49950 | " _Unwind_Exception_Class,\n" |
| 49951 | " _Unwind_Exception *,\n" |
| 49952 | " struct _Unwind_Context *);\n" |
| 49953 | "typedef _Unwind_Personality_Fn __personality_routine;\n" |
| 49954 | "\n" |
| 49955 | "typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *,\n" |
| 49956 | " void *);\n" |
| 49957 | "\n" |
| 49958 | "#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__))\n" |
| 49959 | "typedef enum {\n" |
| 49960 | " _UVRSC_CORE = 0, /* integer register */\n" |
| 49961 | " _UVRSC_VFP = 1, /* vfp */\n" |
| 49962 | " _UVRSC_WMMXD = 3, /* Intel WMMX data register */\n" |
| 49963 | " _UVRSC_WMMXC = 4 /* Intel WMMX control register */\n" |
| 49964 | "} _Unwind_VRS_RegClass;\n" |
| 49965 | "\n" |
| 49966 | "typedef enum {\n" |
| 49967 | " _UVRSD_UINT32 = 0,\n" |
| 49968 | " _UVRSD_VFPX = 1,\n" |
| 49969 | " _UVRSD_UINT64 = 3,\n" |
| 49970 | " _UVRSD_FLOAT = 4,\n" |
| 49971 | " _UVRSD_DOUBLE = 5\n" |
| 49972 | "} _Unwind_VRS_DataRepresentation;\n" |
| 49973 | "\n" |
| 49974 | "typedef enum {\n" |
| 49975 | " _UVRSR_OK = 0,\n" |
| 49976 | " _UVRSR_NOT_IMPLEMENTED = 1,\n" |
| 49977 | " _UVRSR_FAILED = 2\n" |
| 49978 | "} _Unwind_VRS_Result;\n" |
| 49979 | "\n" |
| 49980 | "typedef uint32_t _Unwind_State;\n" |
| 49981 | "#define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0)\n" |
| 49982 | "#define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1)\n" |
| 49983 | "#define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2)\n" |
| 49984 | "#define _US_ACTION_MASK ((_Unwind_State)3)\n" |
| 49985 | "#define _US_FORCE_UNWIND ((_Unwind_State)8)\n" |
| 49986 | "\n" |
| 49987 | "_Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context,\n" |
| 49988 | " _Unwind_VRS_RegClass __regclass,\n" |
| 49989 | " uint32_t __regno,\n" |
| 49990 | " _Unwind_VRS_DataRepresentation __representation,\n" |
| 49991 | " void *__valuep);\n" |
| 49992 | "\n" |
| 49993 | "_Unwind_VRS_Result _Unwind_VRS_Set(struct _Unwind_Context *__context,\n" |
| 49994 | " _Unwind_VRS_RegClass __regclass,\n" |
| 49995 | " uint32_t __regno,\n" |
| 49996 | " _Unwind_VRS_DataRepresentation __representation,\n" |
| 49997 | " void *__valuep);\n" |
| 49998 | "\n" |
| 49999 | "static __inline__\n" |
| 50000 | "_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *__context, int __index) {\n" |
| 50001 | " _Unwind_Word __value;\n" |
| 50002 | " _Unwind_VRS_Get(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n" |
| 50003 | " return __value;\n" |
| 50004 | "}\n" |
| 50005 | "\n" |
| 50006 | "static __inline__\n" |
| 50007 | "void _Unwind_SetGR(struct _Unwind_Context *__context, int __index,\n" |
| 50008 | " _Unwind_Word __value) {\n" |
| 50009 | " _Unwind_VRS_Set(__context, _UVRSC_CORE, __index, _UVRSD_UINT32, &__value);\n" |
| 50010 | "}\n" |
| 50011 | "\n" |
| 50012 | "static __inline__\n" |
| 50013 | "_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *__context) {\n" |
| 50014 | " _Unwind_Word __ip = _Unwind_GetGR(__context, 15);\n" |
| 50015 | " return __ip & ~(_Unwind_Word)(0x1); /* Remove thumb mode bit. */\n" |
| 50016 | "}\n" |
| 50017 | "\n" |
| 50018 | "static __inline__\n" |
| 50019 | "void _Unwind_SetIP(struct _Unwind_Context *__context, _Unwind_Word __value) {\n" |
| 50020 | " _Unwind_Word __thumb_mode_bit = _Unwind_GetGR(__context, 15) & 0x1;\n" |
| 50021 | " _Unwind_SetGR(__context, 15, __value | __thumb_mode_bit);\n" |
| 50022 | "}\n" |
| 50023 | "#else\n" |
| 50024 | "_Unwind_Word _Unwind_GetGR(struct _Unwind_Context *, int);\n" |
| 50025 | "void _Unwind_SetGR(struct _Unwind_Context *, int, _Unwind_Word);\n" |
| 50026 | "\n" |
| 50027 | "_Unwind_Word _Unwind_GetIP(struct _Unwind_Context *);\n" |
| 50028 | "void _Unwind_SetIP(struct _Unwind_Context *, _Unwind_Word);\n" |
| 50029 | "#endif\n" |
| 50030 | "\n" |
| 50031 | "\n" |
| 50032 | "_Unwind_Word _Unwind_GetIPInfo(struct _Unwind_Context *, int *);\n" |
| 50033 | "\n" |
| 50034 | "_Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);\n" |
| 50035 | "\n" |
| 50036 | "_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);\n" |
| 50037 | "\n" |
| 50038 | "void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);\n" |
| 50039 | "\n" |
| 50040 | "_Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);\n" |
| 50041 | "\n" |
| 50042 | "/* DWARF EH functions; currently not available on Darwin/ARM */\n" |
| 50043 | "#if !defined(__APPLE__) || !defined(__arm__)\n" |
| 50044 | "_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *);\n" |
| 50045 | "_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn,\n" |
| 50046 | " void *);\n" |
| 50047 | "void _Unwind_DeleteException(_Unwind_Exception *);\n" |
| 50048 | "void _Unwind_Resume(_Unwind_Exception *);\n" |
| 50049 | "_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *);\n" |
| 50050 | "\n" |
| 50051 | "#endif\n" |
| 50052 | "\n" |
| 50053 | "_Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, void *);\n" |
| 50054 | "\n" |
| 50055 | "/* setjmp(3)/longjmp(3) stuff */\n" |
| 50056 | "typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t;\n" |
| 50057 | "\n" |
| 50058 | "void _Unwind_SjLj_Register(_Unwind_FunctionContext_t);\n" |
| 50059 | "void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t);\n" |
| 50060 | "_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *);\n" |
| 50061 | "_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *,\n" |
| 50062 | " _Unwind_Stop_Fn, void *);\n" |
| 50063 | "void _Unwind_SjLj_Resume(_Unwind_Exception *);\n" |
| 50064 | "_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *);\n" |
| 50065 | "\n" |
| 50066 | "void *_Unwind_FindEnclosingFunction(void *);\n" |
| 50067 | "\n" |
| 50068 | "#ifdef __APPLE__\n" |
| 50069 | "\n" |
| 50070 | "_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)\n" |
| 50071 | " __attribute__((__unavailable__));\n" |
| 50072 | "_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)\n" |
| 50073 | " __attribute__((__unavailable__));\n" |
| 50074 | "\n" |
| 50075 | "/* Darwin-specific functions */\n" |
| 50076 | "void __register_frame(const void *);\n" |
| 50077 | "void __deregister_frame(const void *);\n" |
| 50078 | "\n" |
| 50079 | "struct dwarf_eh_bases {\n" |
| 50080 | " uintptr_t tbase;\n" |
| 50081 | " uintptr_t dbase;\n" |
| 50082 | " uintptr_t func;\n" |
| 50083 | "};\n" |
| 50084 | "void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);\n" |
| 50085 | "\n" |
| 50086 | "void __register_frame_info_bases(const void *, void *, void *, void *)\n" |
| 50087 | " __attribute__((__unavailable__));\n" |
| 50088 | "void __register_frame_info(const void *, void *) __attribute__((__unavailable__));\n" |
| 50089 | "void __register_frame_info_table_bases(const void *, void*, void *, void *)\n" |
| 50090 | " __attribute__((__unavailable__));\n" |
| 50091 | "void __register_frame_info_table(const void *, void *)\n" |
| 50092 | " __attribute__((__unavailable__));\n" |
| 50093 | "void __register_frame_table(const void *) __attribute__((__unavailable__));\n" |
| 50094 | "void __deregister_frame_info(const void *) __attribute__((__unavailable__));\n" |
| 50095 | "void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));\n" |
| 50096 | "\n" |
| 50097 | "#else\n" |
| 50098 | "\n" |
| 50099 | "_Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *);\n" |
| 50100 | "_Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *);\n" |
| 50101 | "\n" |
| 50102 | "#endif\n" |
| 50103 | "\n" |
| 50104 | "\n" |
| 50105 | "#ifndef HIDE_EXPORTS\n" |
| 50106 | "#pragma GCC visibility pop\n" |
| 50107 | "#endif\n" |
| 50108 | "\n" |
| 50109 | "#ifdef __cplusplus\n" |
| 50110 | "}\n" |
| 50111 | "#endif\n" |
| 50112 | "\n" |
| 50113 | "#endif /* __CLANG_UNWIND_H */\n" |
| 50114 | "\n" |
| 50115 | "#endif\n" |
| 50116 | "\n" |
| 50117 | "" } , |
| 50118 | { "/builtins/vadefs.h" , "/* ===-------- vadefs.h ---------------------------------------------------===\n" |
| 50119 | " *\n" |
| 50120 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50121 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50122 | " * in the Software without restriction, including without limitation the rights\n" |
| 50123 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50124 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 50125 | " * furnished to do so, subject to the following conditions:\n" |
| 50126 | " *\n" |
| 50127 | " * The above copyright notice and this permission notice shall be included in\n" |
| 50128 | " * all copies or substantial portions of the Software.\n" |
| 50129 | " *\n" |
| 50130 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50131 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50132 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50133 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50134 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50135 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50136 | " * THE SOFTWARE.\n" |
| 50137 | " *\n" |
| 50138 | " *===-----------------------------------------------------------------------===\n" |
| 50139 | " */\n" |
| 50140 | "\n" |
| 50141 | "/* Only include this if we are aiming for MSVC compatibility. */\n" |
| 50142 | "#ifndef _MSC_VER\n" |
| 50143 | "#include_next <vadefs.h>\n" |
| 50144 | "#else\n" |
| 50145 | "\n" |
| 50146 | "#ifndef __clang_vadefs_h\n" |
| 50147 | "#define __clang_vadefs_h\n" |
| 50148 | "\n" |
| 50149 | "#include_next <vadefs.h>\n" |
| 50150 | "\n" |
| 50151 | "/* Override macros from vadefs.h with definitions that work with Clang. */\n" |
| 50152 | "#ifdef _crt_va_start\n" |
| 50153 | "#undef _crt_va_start\n" |
| 50154 | "#define _crt_va_start(ap, param) __builtin_va_start(ap, param)\n" |
| 50155 | "#endif\n" |
| 50156 | "#ifdef _crt_va_end\n" |
| 50157 | "#undef _crt_va_end\n" |
| 50158 | "#define _crt_va_end(ap) __builtin_va_end(ap)\n" |
| 50159 | "#endif\n" |
| 50160 | "#ifdef _crt_va_arg\n" |
| 50161 | "#undef _crt_va_arg\n" |
| 50162 | "#define _crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n" |
| 50163 | "#endif\n" |
| 50164 | "\n" |
| 50165 | "/* VS 2015 switched to double underscore names, which is an improvement, but now\n" |
| 50166 | " * we have to intercept those names too.\n" |
| 50167 | " */\n" |
| 50168 | "#ifdef __crt_va_start\n" |
| 50169 | "#undef __crt_va_start\n" |
| 50170 | "#define __crt_va_start(ap, param) __builtin_va_start(ap, param)\n" |
| 50171 | "#endif\n" |
| 50172 | "#ifdef __crt_va_end\n" |
| 50173 | "#undef __crt_va_end\n" |
| 50174 | "#define __crt_va_end(ap) __builtin_va_end(ap)\n" |
| 50175 | "#endif\n" |
| 50176 | "#ifdef __crt_va_arg\n" |
| 50177 | "#undef __crt_va_arg\n" |
| 50178 | "#define __crt_va_arg(ap, type) __builtin_va_arg(ap, type)\n" |
| 50179 | "#endif\n" |
| 50180 | "\n" |
| 50181 | "#endif\n" |
| 50182 | "#endif\n" |
| 50183 | "" } , |
| 50184 | { "/builtins/vaesintrin.h" , "/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===\n" |
| 50185 | " *\n" |
| 50186 | " *\n" |
| 50187 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50188 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50189 | " * in the Software without restriction, including without limitation the rights\n" |
| 50190 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50191 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 50192 | " * furnished to do so, subject to the following conditions:\n" |
| 50193 | " *\n" |
| 50194 | " * The above copyright notice and this permission notice shall be included in\n" |
| 50195 | " * all copies or substantial portions of the Software.\n" |
| 50196 | " *\n" |
| 50197 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50198 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50199 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50200 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50201 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50202 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50203 | " * THE SOFTWARE.\n" |
| 50204 | " *\n" |
| 50205 | " *===-----------------------------------------------------------------------===\n" |
| 50206 | " */\n" |
| 50207 | "#ifndef __IMMINTRIN_H\n" |
| 50208 | "#error \"Never use <vaesintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 50209 | "#endif\n" |
| 50210 | "\n" |
| 50211 | "#ifndef __VAESINTRIN_H\n" |
| 50212 | "#define __VAESINTRIN_H\n" |
| 50213 | "\n" |
| 50214 | "/* Default attributes for YMM forms. */\n" |
| 50215 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"vaes\"), __min_vector_width__(256)))\n" |
| 50216 | "\n" |
| 50217 | "/* Default attributes for ZMM forms. */\n" |
| 50218 | "#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__(\"avx512f,vaes\"), __min_vector_width__(512)))\n" |
| 50219 | "\n" |
| 50220 | "\n" |
| 50221 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
| 50222 | " _mm256_aesenc_epi128(__m256i __A, __m256i __B)\n" |
| 50223 | "{\n" |
| 50224 | " return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,\n" |
| 50225 | " (__v4di) __B);\n" |
| 50226 | "}\n" |
| 50227 | "\n" |
| 50228 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n" |
| 50229 | " _mm512_aesenc_epi128(__m512i __A, __m512i __B)\n" |
| 50230 | "{\n" |
| 50231 | " return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,\n" |
| 50232 | " (__v8di) __B);\n" |
| 50233 | "}\n" |
| 50234 | "\n" |
| 50235 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
| 50236 | " _mm256_aesdec_epi128(__m256i __A, __m256i __B)\n" |
| 50237 | "{\n" |
| 50238 | " return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,\n" |
| 50239 | " (__v4di) __B);\n" |
| 50240 | "}\n" |
| 50241 | "\n" |
| 50242 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n" |
| 50243 | " _mm512_aesdec_epi128(__m512i __A, __m512i __B)\n" |
| 50244 | "{\n" |
| 50245 | " return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,\n" |
| 50246 | " (__v8di) __B);\n" |
| 50247 | "}\n" |
| 50248 | "\n" |
| 50249 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
| 50250 | " _mm256_aesenclast_epi128(__m256i __A, __m256i __B)\n" |
| 50251 | "{\n" |
| 50252 | " return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,\n" |
| 50253 | " (__v4di) __B);\n" |
| 50254 | "}\n" |
| 50255 | "\n" |
| 50256 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n" |
| 50257 | " _mm512_aesenclast_epi128(__m512i __A, __m512i __B)\n" |
| 50258 | "{\n" |
| 50259 | " return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,\n" |
| 50260 | " (__v8di) __B);\n" |
| 50261 | "}\n" |
| 50262 | "\n" |
| 50263 | "static __inline__ __m256i __DEFAULT_FN_ATTRS\n" |
| 50264 | " _mm256_aesdeclast_epi128(__m256i __A, __m256i __B)\n" |
| 50265 | "{\n" |
| 50266 | " return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,\n" |
| 50267 | " (__v4di) __B);\n" |
| 50268 | "}\n" |
| 50269 | "\n" |
| 50270 | "static __inline__ __m512i __DEFAULT_FN_ATTRS_F\n" |
| 50271 | " _mm512_aesdeclast_epi128(__m512i __A, __m512i __B)\n" |
| 50272 | "{\n" |
| 50273 | " return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,\n" |
| 50274 | " (__v8di) __B);\n" |
| 50275 | "}\n" |
| 50276 | "\n" |
| 50277 | "\n" |
| 50278 | "#undef __DEFAULT_FN_ATTRS\n" |
| 50279 | "#undef __DEFAULT_FN_ATTRS_F\n" |
| 50280 | "\n" |
| 50281 | "#endif\n" |
| 50282 | "" } , |
| 50283 | { "/builtins/varargs.h" , "/*===---- varargs.h - Variable argument handling -------------------------------------===\n" |
| 50284 | "*\n" |
| 50285 | "* Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50286 | "* of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50287 | "* in the Software without restriction, including without limitation the rights\n" |
| 50288 | "* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50289 | "* copies of the Software, and to permit persons to whom the Software is\n" |
| 50290 | "* furnished to do so, subject to the following conditions:\n" |
| 50291 | "*\n" |
| 50292 | "* The above copyright notice and this permission notice shall be included in\n" |
| 50293 | "* all copies or substantial portions of the Software.\n" |
| 50294 | "*\n" |
| 50295 | "* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50296 | "* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50297 | "* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50298 | "* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50299 | "* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50300 | "* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50301 | "* THE SOFTWARE.\n" |
| 50302 | "*\n" |
| 50303 | "*===-----------------------------------------------------------------------===\n" |
| 50304 | "*/\n" |
| 50305 | "#ifndef __VARARGS_H\n" |
| 50306 | "#define __VARARGS_H\n" |
| 50307 | " #error \"Please use <stdarg.h> instead of <varargs.h>\"\n" |
| 50308 | "#endif\n" |
| 50309 | "" } , |
| 50310 | { "/builtins/vpclmulqdqintrin.h" , "/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===\n" |
| 50311 | " *\n" |
| 50312 | " *\n" |
| 50313 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50314 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50315 | " * in the Software without restriction, including without limitation the rights\n" |
| 50316 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50317 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 50318 | " * furnished to do so, subject to the following conditions:\n" |
| 50319 | " *\n" |
| 50320 | " * The above copyright notice and this permission notice shall be included in\n" |
| 50321 | " * all copies or substantial portions of the Software.\n" |
| 50322 | " *\n" |
| 50323 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50324 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50325 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50326 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50327 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50328 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50329 | " * THE SOFTWARE.\n" |
| 50330 | " *\n" |
| 50331 | " *===-----------------------------------------------------------------------===\n" |
| 50332 | " */\n" |
| 50333 | "#ifndef __IMMINTRIN_H\n" |
| 50334 | "#error \"Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 50335 | "#endif\n" |
| 50336 | "\n" |
| 50337 | "#ifndef __VPCLMULQDQINTRIN_H\n" |
| 50338 | "#define __VPCLMULQDQINTRIN_H\n" |
| 50339 | "\n" |
| 50340 | "#define _mm256_clmulepi64_epi128(A, B, I) \\\n" |
| 50341 | " (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \\\n" |
| 50342 | " (__v4di)(__m256i)(B), \\\n" |
| 50343 | " (char)(I))\n" |
| 50344 | "\n" |
| 50345 | "#define _mm512_clmulepi64_epi128(A, B, I) \\\n" |
| 50346 | " (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \\\n" |
| 50347 | " (__v8di)(__m512i)(B), \\\n" |
| 50348 | " (char)(I))\n" |
| 50349 | "\n" |
| 50350 | "#endif /* __VPCLMULQDQINTRIN_H */\n" |
| 50351 | "\n" |
| 50352 | "" } , |
| 50353 | { "/builtins/waitpkgintrin.h" , "/*===----------------------- waitpkgintrin.h - WAITPKG --------------------===\n" |
| 50354 | " *\n" |
| 50355 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50356 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50357 | " * in the Software without restriction, including without limitation the rights\n" |
| 50358 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50359 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 50360 | " * furnished to do so, subject to the following conditions:\n" |
| 50361 | " *\n" |
| 50362 | " * The above copyright notice and this permission notice shall be included in\n" |
| 50363 | " * all copies or substantial portions of the Software.\n" |
| 50364 | " *\n" |
| 50365 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50366 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50367 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50368 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50369 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50370 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50371 | " * THE SOFTWARE.\n" |
| 50372 | " *\n" |
| 50373 | " *===-----------------------------------------------------------------------===\n" |
| 50374 | " */\n" |
| 50375 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 50376 | "#error \"Never use <waitpkgintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 50377 | "#endif\n" |
| 50378 | "\n" |
| 50379 | "#ifndef __WAITPKGINTRIN_H\n" |
| 50380 | "#define __WAITPKGINTRIN_H\n" |
| 50381 | "\n" |
| 50382 | "/* Define the default attributes for the functions in this file. */\n" |
| 50383 | "#define __DEFAULT_FN_ATTRS \\\n" |
| 50384 | " __attribute__((__always_inline__, __nodebug__, __target__(\"waitpkg\")))\n" |
| 50385 | "\n" |
| 50386 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 50387 | "_umonitor (void * __address)\n" |
| 50388 | "{\n" |
| 50389 | " __builtin_ia32_umonitor (__address);\n" |
| 50390 | "}\n" |
| 50391 | "\n" |
| 50392 | "static __inline__ unsigned char __DEFAULT_FN_ATTRS\n" |
| 50393 | "_umwait (unsigned int __control, unsigned long long __counter)\n" |
| 50394 | "{\n" |
| 50395 | " return __builtin_ia32_umwait (__control,\n" |
| 50396 | " (unsigned int)(__counter >> 32), (unsigned int)__counter);\n" |
| 50397 | "}\n" |
| 50398 | "\n" |
| 50399 | "static __inline__ unsigned char __DEFAULT_FN_ATTRS\n" |
| 50400 | "_tpause (unsigned int __control, unsigned long long __counter)\n" |
| 50401 | "{\n" |
| 50402 | " return __builtin_ia32_tpause (__control,\n" |
| 50403 | " (unsigned int)(__counter >> 32), (unsigned int)__counter);\n" |
| 50404 | "}\n" |
| 50405 | "\n" |
| 50406 | "#undef __DEFAULT_FN_ATTRS\n" |
| 50407 | "\n" |
| 50408 | "#endif /* __WAITPKGINTRIN_H */\n" |
| 50409 | "" } , |
| 50410 | { "/builtins/wbnoinvdintrin.h" , "/*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------===\n" |
| 50411 | " *\n" |
| 50412 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50413 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50414 | " * in the Software without restriction, including without limitation the rights\n" |
| 50415 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50416 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 50417 | " * furnished to do so, subject to the following conditions:\n" |
| 50418 | " *\n" |
| 50419 | " * The above copyright notice and this permission notice shall be included in\n" |
| 50420 | " * all copies or substantial portions of the Software.\n" |
| 50421 | " *\n" |
| 50422 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50423 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50424 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50425 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50426 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50427 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50428 | " * THE SOFTWARE.\n" |
| 50429 | " *\n" |
| 50430 | " *===-----------------------------------------------------------------------===\n" |
| 50431 | " */\n" |
| 50432 | "\n" |
| 50433 | "#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H\n" |
| 50434 | "#error \"Never use <wbnoinvdintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 50435 | "#endif\n" |
| 50436 | "\n" |
| 50437 | "#ifndef __WBNOINVDINTRIN_H\n" |
| 50438 | "#define __WBNOINVDINTRIN_H\n" |
| 50439 | "\n" |
| 50440 | "static __inline__ void\n" |
| 50441 | " __attribute__((__always_inline__, __nodebug__, __target__(\"wbnoinvd\")))\n" |
| 50442 | "_wbnoinvd (void)\n" |
| 50443 | "{\n" |
| 50444 | " __builtin_ia32_wbnoinvd ();\n" |
| 50445 | "}\n" |
| 50446 | "\n" |
| 50447 | "#endif /* __WBNOINVDINTRIN_H */\n" |
| 50448 | "" } , |
| 50449 | { "/builtins/wmmintrin.h" , "/*===---- wmmintrin.h - AES intrinsics ------------------------------------===\n" |
| 50450 | " *\n" |
| 50451 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50452 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50453 | " * in the Software without restriction, including without limitation the rights\n" |
| 50454 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50455 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 50456 | " * furnished to do so, subject to the following conditions:\n" |
| 50457 | " *\n" |
| 50458 | " * The above copyright notice and this permission notice shall be included in\n" |
| 50459 | " * all copies or substantial portions of the Software.\n" |
| 50460 | " *\n" |
| 50461 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50462 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50463 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50464 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50465 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50466 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50467 | " * THE SOFTWARE.\n" |
| 50468 | " *\n" |
| 50469 | " *===-----------------------------------------------------------------------===\n" |
| 50470 | " */\n" |
| 50471 | "\n" |
| 50472 | "#ifndef __WMMINTRIN_H\n" |
| 50473 | "#define __WMMINTRIN_H\n" |
| 50474 | "\n" |
| 50475 | "#include <emmintrin.h>\n" |
| 50476 | "\n" |
| 50477 | "#include <__wmmintrin_aes.h>\n" |
| 50478 | "\n" |
| 50479 | "#include <__wmmintrin_pclmul.h>\n" |
| 50480 | "\n" |
| 50481 | "#endif /* __WMMINTRIN_H */\n" |
| 50482 | "" } , |
| 50483 | { "/builtins/x86intrin.h" , "/*===---- x86intrin.h - X86 intrinsics -------------------------------------===\n" |
| 50484 | " *\n" |
| 50485 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50486 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50487 | " * in the Software without restriction, including without limitation the rights\n" |
| 50488 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50489 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 50490 | " * furnished to do so, subject to the following conditions:\n" |
| 50491 | " *\n" |
| 50492 | " * The above copyright notice and this permission notice shall be included in\n" |
| 50493 | " * all copies or substantial portions of the Software.\n" |
| 50494 | " *\n" |
| 50495 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50496 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50497 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50498 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50499 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50500 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50501 | " * THE SOFTWARE.\n" |
| 50502 | " *\n" |
| 50503 | " *===-----------------------------------------------------------------------===\n" |
| 50504 | " */\n" |
| 50505 | "\n" |
| 50506 | "#ifndef __X86INTRIN_H\n" |
| 50507 | "#define __X86INTRIN_H\n" |
| 50508 | "\n" |
| 50509 | "#include <ia32intrin.h>\n" |
| 50510 | "\n" |
| 50511 | "#include <immintrin.h>\n" |
| 50512 | "\n" |
| 50513 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__3dNOW__)\n" |
| 50514 | "#include <mm3dnow.h>\n" |
| 50515 | "#endif\n" |
| 50516 | "\n" |
| 50517 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)\n" |
| 50518 | "#include <prfchwintrin.h>\n" |
| 50519 | "#endif\n" |
| 50520 | "\n" |
| 50521 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE4A__)\n" |
| 50522 | "#include <ammintrin.h>\n" |
| 50523 | "#endif\n" |
| 50524 | "\n" |
| 50525 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA4__)\n" |
| 50526 | "#include <fma4intrin.h>\n" |
| 50527 | "#endif\n" |
| 50528 | "\n" |
| 50529 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XOP__)\n" |
| 50530 | "#include <xopintrin.h>\n" |
| 50531 | "#endif\n" |
| 50532 | "\n" |
| 50533 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TBM__)\n" |
| 50534 | "#include <tbmintrin.h>\n" |
| 50535 | "#endif\n" |
| 50536 | "\n" |
| 50537 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__)\n" |
| 50538 | "#include <lwpintrin.h>\n" |
| 50539 | "#endif\n" |
| 50540 | "\n" |
| 50541 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MWAITX__)\n" |
| 50542 | "#include <mwaitxintrin.h>\n" |
| 50543 | "#endif\n" |
| 50544 | "\n" |
| 50545 | "#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)\n" |
| 50546 | "#include <clzerointrin.h>\n" |
| 50547 | "#endif\n" |
| 50548 | "\n" |
| 50549 | "\n" |
| 50550 | "#endif /* __X86INTRIN_H */\n" |
| 50551 | "" } , |
| 50552 | { "/builtins/xmmintrin.h" , "/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===\n" |
| 50553 | " *\n" |
| 50554 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 50555 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 50556 | " * in the Software without restriction, including without limitation the rights\n" |
| 50557 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 50558 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 50559 | " * furnished to do so, subject to the following conditions:\n" |
| 50560 | " *\n" |
| 50561 | " * The above copyright notice and this permission notice shall be included in\n" |
| 50562 | " * all copies or substantial portions of the Software.\n" |
| 50563 | " *\n" |
| 50564 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 50565 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 50566 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 50567 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 50568 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 50569 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 50570 | " * THE SOFTWARE.\n" |
| 50571 | " *\n" |
| 50572 | " *===-----------------------------------------------------------------------===\n" |
| 50573 | " */\n" |
| 50574 | "\n" |
| 50575 | "#ifndef __XMMINTRIN_H\n" |
| 50576 | "#define __XMMINTRIN_H\n" |
| 50577 | "\n" |
| 50578 | "#include <mmintrin.h>\n" |
| 50579 | "\n" |
| 50580 | "typedef int __v4si __attribute__((__vector_size__(16)));\n" |
| 50581 | "typedef float __v4sf __attribute__((__vector_size__(16)));\n" |
| 50582 | "typedef float __m128 __attribute__((__vector_size__(16)));\n" |
| 50583 | "\n" |
| 50584 | "/* Unsigned types */\n" |
| 50585 | "typedef unsigned int __v4su __attribute__((__vector_size__(16)));\n" |
| 50586 | "\n" |
| 50587 | "/* This header should only be included in a hosted environment as it depends on\n" |
| 50588 | " * a standard library to provide allocation routines. */\n" |
| 50589 | "#if __STDC_HOSTED__\n" |
| 50590 | "#include <mm_malloc.h>\n" |
| 50591 | "#endif\n" |
| 50592 | "\n" |
| 50593 | "/* Define the default attributes for the functions in this file. */\n" |
| 50594 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"sse\"), __min_vector_width__(128)))\n" |
| 50595 | "#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__(\"mmx,sse\"), __min_vector_width__(64)))\n" |
| 50596 | "\n" |
| 50597 | "/// Adds the 32-bit float values in the low-order bits of the operands.\n" |
| 50598 | "///\n" |
| 50599 | "/// \\headerfile <x86intrin.h>\n" |
| 50600 | "///\n" |
| 50601 | "/// This intrinsic corresponds to the <c> VADDSS / ADDSS </c> instructions.\n" |
| 50602 | "///\n" |
| 50603 | "/// \\param __a\n" |
| 50604 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 50605 | "/// The lower 32 bits of this operand are used in the calculation.\n" |
| 50606 | "/// \\param __b\n" |
| 50607 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 50608 | "/// The lower 32 bits of this operand are used in the calculation.\n" |
| 50609 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum\n" |
| 50610 | "/// of the lower 32 bits of both operands. The upper 96 bits are copied from\n" |
| 50611 | "/// the upper 96 bits of the first source operand.\n" |
| 50612 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50613 | "_mm_add_ss(__m128 __a, __m128 __b)\n" |
| 50614 | "{\n" |
| 50615 | " __a[0] += __b[0];\n" |
| 50616 | " return __a;\n" |
| 50617 | "}\n" |
| 50618 | "\n" |
| 50619 | "/// Adds two 128-bit vectors of [4 x float], and returns the results of\n" |
| 50620 | "/// the addition.\n" |
| 50621 | "///\n" |
| 50622 | "/// \\headerfile <x86intrin.h>\n" |
| 50623 | "///\n" |
| 50624 | "/// This intrinsic corresponds to the <c> VADDPS / ADDPS </c> instructions.\n" |
| 50625 | "///\n" |
| 50626 | "/// \\param __a\n" |
| 50627 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 50628 | "/// \\param __b\n" |
| 50629 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 50630 | "/// \\returns A 128-bit vector of [4 x float] containing the sums of both\n" |
| 50631 | "/// operands.\n" |
| 50632 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50633 | "_mm_add_ps(__m128 __a, __m128 __b)\n" |
| 50634 | "{\n" |
| 50635 | " return (__m128)((__v4sf)__a + (__v4sf)__b);\n" |
| 50636 | "}\n" |
| 50637 | "\n" |
| 50638 | "/// Subtracts the 32-bit float value in the low-order bits of the second\n" |
| 50639 | "/// operand from the corresponding value in the first operand.\n" |
| 50640 | "///\n" |
| 50641 | "/// \\headerfile <x86intrin.h>\n" |
| 50642 | "///\n" |
| 50643 | "/// This intrinsic corresponds to the <c> VSUBSS / SUBSS </c> instructions.\n" |
| 50644 | "///\n" |
| 50645 | "/// \\param __a\n" |
| 50646 | "/// A 128-bit vector of [4 x float] containing the minuend. The lower 32 bits\n" |
| 50647 | "/// of this operand are used in the calculation.\n" |
| 50648 | "/// \\param __b\n" |
| 50649 | "/// A 128-bit vector of [4 x float] containing the subtrahend. The lower 32\n" |
| 50650 | "/// bits of this operand are used in the calculation.\n" |
| 50651 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
| 50652 | "/// difference of the lower 32 bits of both operands. The upper 96 bits are\n" |
| 50653 | "/// copied from the upper 96 bits of the first source operand.\n" |
| 50654 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50655 | "_mm_sub_ss(__m128 __a, __m128 __b)\n" |
| 50656 | "{\n" |
| 50657 | " __a[0] -= __b[0];\n" |
| 50658 | " return __a;\n" |
| 50659 | "}\n" |
| 50660 | "\n" |
| 50661 | "/// Subtracts each of the values of the second operand from the first\n" |
| 50662 | "/// operand, both of which are 128-bit vectors of [4 x float] and returns\n" |
| 50663 | "/// the results of the subtraction.\n" |
| 50664 | "///\n" |
| 50665 | "/// \\headerfile <x86intrin.h>\n" |
| 50666 | "///\n" |
| 50667 | "/// This intrinsic corresponds to the <c> VSUBPS / SUBPS </c> instructions.\n" |
| 50668 | "///\n" |
| 50669 | "/// \\param __a\n" |
| 50670 | "/// A 128-bit vector of [4 x float] containing the minuend.\n" |
| 50671 | "/// \\param __b\n" |
| 50672 | "/// A 128-bit vector of [4 x float] containing the subtrahend.\n" |
| 50673 | "/// \\returns A 128-bit vector of [4 x float] containing the differences between\n" |
| 50674 | "/// both operands.\n" |
| 50675 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50676 | "_mm_sub_ps(__m128 __a, __m128 __b)\n" |
| 50677 | "{\n" |
| 50678 | " return (__m128)((__v4sf)__a - (__v4sf)__b);\n" |
| 50679 | "}\n" |
| 50680 | "\n" |
| 50681 | "/// Multiplies two 32-bit float values in the low-order bits of the\n" |
| 50682 | "/// operands.\n" |
| 50683 | "///\n" |
| 50684 | "/// \\headerfile <x86intrin.h>\n" |
| 50685 | "///\n" |
| 50686 | "/// This intrinsic corresponds to the <c> VMULSS / MULSS </c> instructions.\n" |
| 50687 | "///\n" |
| 50688 | "/// \\param __a\n" |
| 50689 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 50690 | "/// The lower 32 bits of this operand are used in the calculation.\n" |
| 50691 | "/// \\param __b\n" |
| 50692 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 50693 | "/// The lower 32 bits of this operand are used in the calculation.\n" |
| 50694 | "/// \\returns A 128-bit vector of [4 x float] containing the product of the lower\n" |
| 50695 | "/// 32 bits of both operands. The upper 96 bits are copied from the upper 96\n" |
| 50696 | "/// bits of the first source operand.\n" |
| 50697 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50698 | "_mm_mul_ss(__m128 __a, __m128 __b)\n" |
| 50699 | "{\n" |
| 50700 | " __a[0] *= __b[0];\n" |
| 50701 | " return __a;\n" |
| 50702 | "}\n" |
| 50703 | "\n" |
| 50704 | "/// Multiplies two 128-bit vectors of [4 x float] and returns the\n" |
| 50705 | "/// results of the multiplication.\n" |
| 50706 | "///\n" |
| 50707 | "/// \\headerfile <x86intrin.h>\n" |
| 50708 | "///\n" |
| 50709 | "/// This intrinsic corresponds to the <c> VMULPS / MULPS </c> instructions.\n" |
| 50710 | "///\n" |
| 50711 | "/// \\param __a\n" |
| 50712 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 50713 | "/// \\param __b\n" |
| 50714 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 50715 | "/// \\returns A 128-bit vector of [4 x float] containing the products of both\n" |
| 50716 | "/// operands.\n" |
| 50717 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50718 | "_mm_mul_ps(__m128 __a, __m128 __b)\n" |
| 50719 | "{\n" |
| 50720 | " return (__m128)((__v4sf)__a * (__v4sf)__b);\n" |
| 50721 | "}\n" |
| 50722 | "\n" |
| 50723 | "/// Divides the value in the low-order 32 bits of the first operand by\n" |
| 50724 | "/// the corresponding value in the second operand.\n" |
| 50725 | "///\n" |
| 50726 | "/// \\headerfile <x86intrin.h>\n" |
| 50727 | "///\n" |
| 50728 | "/// This intrinsic corresponds to the <c> VDIVSS / DIVSS </c> instructions.\n" |
| 50729 | "///\n" |
| 50730 | "/// \\param __a\n" |
| 50731 | "/// A 128-bit vector of [4 x float] containing the dividend. The lower 32\n" |
| 50732 | "/// bits of this operand are used in the calculation.\n" |
| 50733 | "/// \\param __b\n" |
| 50734 | "/// A 128-bit vector of [4 x float] containing the divisor. The lower 32 bits\n" |
| 50735 | "/// of this operand are used in the calculation.\n" |
| 50736 | "/// \\returns A 128-bit vector of [4 x float] containing the quotients of the\n" |
| 50737 | "/// lower 32 bits of both operands. The upper 96 bits are copied from the\n" |
| 50738 | "/// upper 96 bits of the first source operand.\n" |
| 50739 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50740 | "_mm_div_ss(__m128 __a, __m128 __b)\n" |
| 50741 | "{\n" |
| 50742 | " __a[0] /= __b[0];\n" |
| 50743 | " return __a;\n" |
| 50744 | "}\n" |
| 50745 | "\n" |
| 50746 | "/// Divides two 128-bit vectors of [4 x float].\n" |
| 50747 | "///\n" |
| 50748 | "/// \\headerfile <x86intrin.h>\n" |
| 50749 | "///\n" |
| 50750 | "/// This intrinsic corresponds to the <c> VDIVPS / DIVPS </c> instructions.\n" |
| 50751 | "///\n" |
| 50752 | "/// \\param __a\n" |
| 50753 | "/// A 128-bit vector of [4 x float] containing the dividend.\n" |
| 50754 | "/// \\param __b\n" |
| 50755 | "/// A 128-bit vector of [4 x float] containing the divisor.\n" |
| 50756 | "/// \\returns A 128-bit vector of [4 x float] containing the quotients of both\n" |
| 50757 | "/// operands.\n" |
| 50758 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50759 | "_mm_div_ps(__m128 __a, __m128 __b)\n" |
| 50760 | "{\n" |
| 50761 | " return (__m128)((__v4sf)__a / (__v4sf)__b);\n" |
| 50762 | "}\n" |
| 50763 | "\n" |
| 50764 | "/// Calculates the square root of the value stored in the low-order bits\n" |
| 50765 | "/// of a 128-bit vector of [4 x float].\n" |
| 50766 | "///\n" |
| 50767 | "/// \\headerfile <x86intrin.h>\n" |
| 50768 | "///\n" |
| 50769 | "/// This intrinsic corresponds to the <c> VSQRTSS / SQRTSS </c> instructions.\n" |
| 50770 | "///\n" |
| 50771 | "/// \\param __a\n" |
| 50772 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 50773 | "/// used in the calculation.\n" |
| 50774 | "/// \\returns A 128-bit vector of [4 x float] containing the square root of the\n" |
| 50775 | "/// value in the low-order bits of the operand.\n" |
| 50776 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50777 | "_mm_sqrt_ss(__m128 __a)\n" |
| 50778 | "{\n" |
| 50779 | " return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);\n" |
| 50780 | "}\n" |
| 50781 | "\n" |
| 50782 | "/// Calculates the square roots of the values stored in a 128-bit vector\n" |
| 50783 | "/// of [4 x float].\n" |
| 50784 | "///\n" |
| 50785 | "/// \\headerfile <x86intrin.h>\n" |
| 50786 | "///\n" |
| 50787 | "/// This intrinsic corresponds to the <c> VSQRTPS / SQRTPS </c> instructions.\n" |
| 50788 | "///\n" |
| 50789 | "/// \\param __a\n" |
| 50790 | "/// A 128-bit vector of [4 x float].\n" |
| 50791 | "/// \\returns A 128-bit vector of [4 x float] containing the square roots of the\n" |
| 50792 | "/// values in the operand.\n" |
| 50793 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50794 | "_mm_sqrt_ps(__m128 __a)\n" |
| 50795 | "{\n" |
| 50796 | " return __builtin_ia32_sqrtps((__v4sf)__a);\n" |
| 50797 | "}\n" |
| 50798 | "\n" |
| 50799 | "/// Calculates the approximate reciprocal of the value stored in the\n" |
| 50800 | "/// low-order bits of a 128-bit vector of [4 x float].\n" |
| 50801 | "///\n" |
| 50802 | "/// \\headerfile <x86intrin.h>\n" |
| 50803 | "///\n" |
| 50804 | "/// This intrinsic corresponds to the <c> VRCPSS / RCPSS </c> instructions.\n" |
| 50805 | "///\n" |
| 50806 | "/// \\param __a\n" |
| 50807 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 50808 | "/// used in the calculation.\n" |
| 50809 | "/// \\returns A 128-bit vector of [4 x float] containing the approximate\n" |
| 50810 | "/// reciprocal of the value in the low-order bits of the operand.\n" |
| 50811 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50812 | "_mm_rcp_ss(__m128 __a)\n" |
| 50813 | "{\n" |
| 50814 | " return (__m128)__builtin_ia32_rcpss((__v4sf)__a);\n" |
| 50815 | "}\n" |
| 50816 | "\n" |
| 50817 | "/// Calculates the approximate reciprocals of the values stored in a\n" |
| 50818 | "/// 128-bit vector of [4 x float].\n" |
| 50819 | "///\n" |
| 50820 | "/// \\headerfile <x86intrin.h>\n" |
| 50821 | "///\n" |
| 50822 | "/// This intrinsic corresponds to the <c> VRCPPS / RCPPS </c> instructions.\n" |
| 50823 | "///\n" |
| 50824 | "/// \\param __a\n" |
| 50825 | "/// A 128-bit vector of [4 x float].\n" |
| 50826 | "/// \\returns A 128-bit vector of [4 x float] containing the approximate\n" |
| 50827 | "/// reciprocals of the values in the operand.\n" |
| 50828 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50829 | "_mm_rcp_ps(__m128 __a)\n" |
| 50830 | "{\n" |
| 50831 | " return (__m128)__builtin_ia32_rcpps((__v4sf)__a);\n" |
| 50832 | "}\n" |
| 50833 | "\n" |
| 50834 | "/// Calculates the approximate reciprocal of the square root of the value\n" |
| 50835 | "/// stored in the low-order bits of a 128-bit vector of [4 x float].\n" |
| 50836 | "///\n" |
| 50837 | "/// \\headerfile <x86intrin.h>\n" |
| 50838 | "///\n" |
| 50839 | "/// This intrinsic corresponds to the <c> VRSQRTSS / RSQRTSS </c> instructions.\n" |
| 50840 | "///\n" |
| 50841 | "/// \\param __a\n" |
| 50842 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 50843 | "/// used in the calculation.\n" |
| 50844 | "/// \\returns A 128-bit vector of [4 x float] containing the approximate\n" |
| 50845 | "/// reciprocal of the square root of the value in the low-order bits of the\n" |
| 50846 | "/// operand.\n" |
| 50847 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50848 | "_mm_rsqrt_ss(__m128 __a)\n" |
| 50849 | "{\n" |
| 50850 | " return __builtin_ia32_rsqrtss((__v4sf)__a);\n" |
| 50851 | "}\n" |
| 50852 | "\n" |
| 50853 | "/// Calculates the approximate reciprocals of the square roots of the\n" |
| 50854 | "/// values stored in a 128-bit vector of [4 x float].\n" |
| 50855 | "///\n" |
| 50856 | "/// \\headerfile <x86intrin.h>\n" |
| 50857 | "///\n" |
| 50858 | "/// This intrinsic corresponds to the <c> VRSQRTPS / RSQRTPS </c> instructions.\n" |
| 50859 | "///\n" |
| 50860 | "/// \\param __a\n" |
| 50861 | "/// A 128-bit vector of [4 x float].\n" |
| 50862 | "/// \\returns A 128-bit vector of [4 x float] containing the approximate\n" |
| 50863 | "/// reciprocals of the square roots of the values in the operand.\n" |
| 50864 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50865 | "_mm_rsqrt_ps(__m128 __a)\n" |
| 50866 | "{\n" |
| 50867 | " return __builtin_ia32_rsqrtps((__v4sf)__a);\n" |
| 50868 | "}\n" |
| 50869 | "\n" |
| 50870 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 50871 | "/// operands and returns the lesser value in the low-order bits of the\n" |
| 50872 | "/// vector of [4 x float].\n" |
| 50873 | "///\n" |
| 50874 | "/// \\headerfile <x86intrin.h>\n" |
| 50875 | "///\n" |
| 50876 | "/// This intrinsic corresponds to the <c> VMINSS / MINSS </c> instructions.\n" |
| 50877 | "///\n" |
| 50878 | "/// \\param __a\n" |
| 50879 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 50880 | "/// 32 bits of this operand are used in the comparison.\n" |
| 50881 | "/// \\param __b\n" |
| 50882 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 50883 | "/// 32 bits of this operand are used in the comparison.\n" |
| 50884 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
| 50885 | "/// minimum value between both operands. The upper 96 bits are copied from\n" |
| 50886 | "/// the upper 96 bits of the first source operand.\n" |
| 50887 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50888 | "_mm_min_ss(__m128 __a, __m128 __b)\n" |
| 50889 | "{\n" |
| 50890 | " return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);\n" |
| 50891 | "}\n" |
| 50892 | "\n" |
| 50893 | "/// Compares two 128-bit vectors of [4 x float] and returns the lesser\n" |
| 50894 | "/// of each pair of values.\n" |
| 50895 | "///\n" |
| 50896 | "/// \\headerfile <x86intrin.h>\n" |
| 50897 | "///\n" |
| 50898 | "/// This intrinsic corresponds to the <c> VMINPS / MINPS </c> instructions.\n" |
| 50899 | "///\n" |
| 50900 | "/// \\param __a\n" |
| 50901 | "/// A 128-bit vector of [4 x float] containing one of the operands.\n" |
| 50902 | "/// \\param __b\n" |
| 50903 | "/// A 128-bit vector of [4 x float] containing one of the operands.\n" |
| 50904 | "/// \\returns A 128-bit vector of [4 x float] containing the minimum values\n" |
| 50905 | "/// between both operands.\n" |
| 50906 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50907 | "_mm_min_ps(__m128 __a, __m128 __b)\n" |
| 50908 | "{\n" |
| 50909 | " return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);\n" |
| 50910 | "}\n" |
| 50911 | "\n" |
| 50912 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 50913 | "/// operands and returns the greater value in the low-order bits of a 128-bit\n" |
| 50914 | "/// vector of [4 x float].\n" |
| 50915 | "///\n" |
| 50916 | "/// \\headerfile <x86intrin.h>\n" |
| 50917 | "///\n" |
| 50918 | "/// This intrinsic corresponds to the <c> VMAXSS / MAXSS </c> instructions.\n" |
| 50919 | "///\n" |
| 50920 | "/// \\param __a\n" |
| 50921 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 50922 | "/// 32 bits of this operand are used in the comparison.\n" |
| 50923 | "/// \\param __b\n" |
| 50924 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 50925 | "/// 32 bits of this operand are used in the comparison.\n" |
| 50926 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
| 50927 | "/// maximum value between both operands. The upper 96 bits are copied from\n" |
| 50928 | "/// the upper 96 bits of the first source operand.\n" |
| 50929 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50930 | "_mm_max_ss(__m128 __a, __m128 __b)\n" |
| 50931 | "{\n" |
| 50932 | " return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);\n" |
| 50933 | "}\n" |
| 50934 | "\n" |
| 50935 | "/// Compares two 128-bit vectors of [4 x float] and returns the greater\n" |
| 50936 | "/// of each pair of values.\n" |
| 50937 | "///\n" |
| 50938 | "/// \\headerfile <x86intrin.h>\n" |
| 50939 | "///\n" |
| 50940 | "/// This intrinsic corresponds to the <c> VMAXPS / MAXPS </c> instructions.\n" |
| 50941 | "///\n" |
| 50942 | "/// \\param __a\n" |
| 50943 | "/// A 128-bit vector of [4 x float] containing one of the operands.\n" |
| 50944 | "/// \\param __b\n" |
| 50945 | "/// A 128-bit vector of [4 x float] containing one of the operands.\n" |
| 50946 | "/// \\returns A 128-bit vector of [4 x float] containing the maximum values\n" |
| 50947 | "/// between both operands.\n" |
| 50948 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50949 | "_mm_max_ps(__m128 __a, __m128 __b)\n" |
| 50950 | "{\n" |
| 50951 | " return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);\n" |
| 50952 | "}\n" |
| 50953 | "\n" |
| 50954 | "/// Performs a bitwise AND of two 128-bit vectors of [4 x float].\n" |
| 50955 | "///\n" |
| 50956 | "/// \\headerfile <x86intrin.h>\n" |
| 50957 | "///\n" |
| 50958 | "/// This intrinsic corresponds to the <c> VANDPS / ANDPS </c> instructions.\n" |
| 50959 | "///\n" |
| 50960 | "/// \\param __a\n" |
| 50961 | "/// A 128-bit vector containing one of the source operands.\n" |
| 50962 | "/// \\param __b\n" |
| 50963 | "/// A 128-bit vector containing one of the source operands.\n" |
| 50964 | "/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n" |
| 50965 | "/// values between both operands.\n" |
| 50966 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50967 | "_mm_and_ps(__m128 __a, __m128 __b)\n" |
| 50968 | "{\n" |
| 50969 | " return (__m128)((__v4su)__a & (__v4su)__b);\n" |
| 50970 | "}\n" |
| 50971 | "\n" |
| 50972 | "/// Performs a bitwise AND of two 128-bit vectors of [4 x float], using\n" |
| 50973 | "/// the one's complement of the values contained in the first source\n" |
| 50974 | "/// operand.\n" |
| 50975 | "///\n" |
| 50976 | "/// \\headerfile <x86intrin.h>\n" |
| 50977 | "///\n" |
| 50978 | "/// This intrinsic corresponds to the <c> VANDNPS / ANDNPS </c> instructions.\n" |
| 50979 | "///\n" |
| 50980 | "/// \\param __a\n" |
| 50981 | "/// A 128-bit vector of [4 x float] containing the first source operand. The\n" |
| 50982 | "/// one's complement of this value is used in the bitwise AND.\n" |
| 50983 | "/// \\param __b\n" |
| 50984 | "/// A 128-bit vector of [4 x float] containing the second source operand.\n" |
| 50985 | "/// \\returns A 128-bit vector of [4 x float] containing the bitwise AND of the\n" |
| 50986 | "/// one's complement of the first operand and the values in the second\n" |
| 50987 | "/// operand.\n" |
| 50988 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 50989 | "_mm_andnot_ps(__m128 __a, __m128 __b)\n" |
| 50990 | "{\n" |
| 50991 | " return (__m128)(~(__v4su)__a & (__v4su)__b);\n" |
| 50992 | "}\n" |
| 50993 | "\n" |
| 50994 | "/// Performs a bitwise OR of two 128-bit vectors of [4 x float].\n" |
| 50995 | "///\n" |
| 50996 | "/// \\headerfile <x86intrin.h>\n" |
| 50997 | "///\n" |
| 50998 | "/// This intrinsic corresponds to the <c> VORPS / ORPS </c> instructions.\n" |
| 50999 | "///\n" |
| 51000 | "/// \\param __a\n" |
| 51001 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 51002 | "/// \\param __b\n" |
| 51003 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 51004 | "/// \\returns A 128-bit vector of [4 x float] containing the bitwise OR of the\n" |
| 51005 | "/// values between both operands.\n" |
| 51006 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51007 | "_mm_or_ps(__m128 __a, __m128 __b)\n" |
| 51008 | "{\n" |
| 51009 | " return (__m128)((__v4su)__a | (__v4su)__b);\n" |
| 51010 | "}\n" |
| 51011 | "\n" |
| 51012 | "/// Performs a bitwise exclusive OR of two 128-bit vectors of\n" |
| 51013 | "/// [4 x float].\n" |
| 51014 | "///\n" |
| 51015 | "/// \\headerfile <x86intrin.h>\n" |
| 51016 | "///\n" |
| 51017 | "/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instructions.\n" |
| 51018 | "///\n" |
| 51019 | "/// \\param __a\n" |
| 51020 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 51021 | "/// \\param __b\n" |
| 51022 | "/// A 128-bit vector of [4 x float] containing one of the source operands.\n" |
| 51023 | "/// \\returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR\n" |
| 51024 | "/// of the values between both operands.\n" |
| 51025 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51026 | "_mm_xor_ps(__m128 __a, __m128 __b)\n" |
| 51027 | "{\n" |
| 51028 | " return (__m128)((__v4su)__a ^ (__v4su)__b);\n" |
| 51029 | "}\n" |
| 51030 | "\n" |
| 51031 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51032 | "/// operands for equality and returns the result of the comparison in the\n" |
| 51033 | "/// low-order bits of a vector [4 x float].\n" |
| 51034 | "///\n" |
| 51035 | "/// \\headerfile <x86intrin.h>\n" |
| 51036 | "///\n" |
| 51037 | "/// This intrinsic corresponds to the <c> VCMPEQSS / CMPEQSS </c> instructions.\n" |
| 51038 | "///\n" |
| 51039 | "/// \\param __a\n" |
| 51040 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51041 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51042 | "/// \\param __b\n" |
| 51043 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51044 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51045 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51046 | "/// in the low-order bits.\n" |
| 51047 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51048 | "_mm_cmpeq_ss(__m128 __a, __m128 __b)\n" |
| 51049 | "{\n" |
| 51050 | " return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);\n" |
| 51051 | "}\n" |
| 51052 | "\n" |
| 51053 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51054 | "/// 128-bit vectors of [4 x float] for equality.\n" |
| 51055 | "///\n" |
| 51056 | "/// \\headerfile <x86intrin.h>\n" |
| 51057 | "///\n" |
| 51058 | "/// This intrinsic corresponds to the <c> VCMPEQPS / CMPEQPS </c> instructions.\n" |
| 51059 | "///\n" |
| 51060 | "/// \\param __a\n" |
| 51061 | "/// A 128-bit vector of [4 x float].\n" |
| 51062 | "/// \\param __b\n" |
| 51063 | "/// A 128-bit vector of [4 x float].\n" |
| 51064 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51065 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51066 | "_mm_cmpeq_ps(__m128 __a, __m128 __b)\n" |
| 51067 | "{\n" |
| 51068 | " return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);\n" |
| 51069 | "}\n" |
| 51070 | "\n" |
| 51071 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51072 | "/// operands to determine if the value in the first operand is less than the\n" |
| 51073 | "/// corresponding value in the second operand and returns the result of the\n" |
| 51074 | "/// comparison in the low-order bits of a vector of [4 x float].\n" |
| 51075 | "///\n" |
| 51076 | "/// \\headerfile <x86intrin.h>\n" |
| 51077 | "///\n" |
| 51078 | "/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n" |
| 51079 | "///\n" |
| 51080 | "/// \\param __a\n" |
| 51081 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51082 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51083 | "/// \\param __b\n" |
| 51084 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51085 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51086 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51087 | "/// in the low-order bits.\n" |
| 51088 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51089 | "_mm_cmplt_ss(__m128 __a, __m128 __b)\n" |
| 51090 | "{\n" |
| 51091 | " return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);\n" |
| 51092 | "}\n" |
| 51093 | "\n" |
| 51094 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51095 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51096 | "/// operand are less than those in the second operand.\n" |
| 51097 | "///\n" |
| 51098 | "/// \\headerfile <x86intrin.h>\n" |
| 51099 | "///\n" |
| 51100 | "/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n" |
| 51101 | "///\n" |
| 51102 | "/// \\param __a\n" |
| 51103 | "/// A 128-bit vector of [4 x float].\n" |
| 51104 | "/// \\param __b\n" |
| 51105 | "/// A 128-bit vector of [4 x float].\n" |
| 51106 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51107 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51108 | "_mm_cmplt_ps(__m128 __a, __m128 __b)\n" |
| 51109 | "{\n" |
| 51110 | " return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);\n" |
| 51111 | "}\n" |
| 51112 | "\n" |
| 51113 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51114 | "/// operands to determine if the value in the first operand is less than or\n" |
| 51115 | "/// equal to the corresponding value in the second operand and returns the\n" |
| 51116 | "/// result of the comparison in the low-order bits of a vector of\n" |
| 51117 | "/// [4 x float].\n" |
| 51118 | "///\n" |
| 51119 | "/// \\headerfile <x86intrin.h>\n" |
| 51120 | "///\n" |
| 51121 | "/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n" |
| 51122 | "///\n" |
| 51123 | "/// \\param __a\n" |
| 51124 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51125 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51126 | "/// \\param __b\n" |
| 51127 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51128 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51129 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51130 | "/// in the low-order bits.\n" |
| 51131 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51132 | "_mm_cmple_ss(__m128 __a, __m128 __b)\n" |
| 51133 | "{\n" |
| 51134 | " return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);\n" |
| 51135 | "}\n" |
| 51136 | "\n" |
| 51137 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51138 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51139 | "/// operand are less than or equal to those in the second operand.\n" |
| 51140 | "///\n" |
| 51141 | "/// \\headerfile <x86intrin.h>\n" |
| 51142 | "///\n" |
| 51143 | "/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n" |
| 51144 | "///\n" |
| 51145 | "/// \\param __a\n" |
| 51146 | "/// A 128-bit vector of [4 x float].\n" |
| 51147 | "/// \\param __b\n" |
| 51148 | "/// A 128-bit vector of [4 x float].\n" |
| 51149 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51150 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51151 | "_mm_cmple_ps(__m128 __a, __m128 __b)\n" |
| 51152 | "{\n" |
| 51153 | " return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);\n" |
| 51154 | "}\n" |
| 51155 | "\n" |
| 51156 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51157 | "/// operands to determine if the value in the first operand is greater than\n" |
| 51158 | "/// the corresponding value in the second operand and returns the result of\n" |
| 51159 | "/// the comparison in the low-order bits of a vector of [4 x float].\n" |
| 51160 | "///\n" |
| 51161 | "/// \\headerfile <x86intrin.h>\n" |
| 51162 | "///\n" |
| 51163 | "/// This intrinsic corresponds to the <c> VCMPLTSS / CMPLTSS </c> instructions.\n" |
| 51164 | "///\n" |
| 51165 | "/// \\param __a\n" |
| 51166 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51167 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51168 | "/// \\param __b\n" |
| 51169 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51170 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51171 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51172 | "/// in the low-order bits.\n" |
| 51173 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51174 | "_mm_cmpgt_ss(__m128 __a, __m128 __b)\n" |
| 51175 | "{\n" |
| 51176 | " return (__m128)__builtin_shufflevector((__v4sf)__a,\n" |
| 51177 | " (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),\n" |
| 51178 | " 4, 1, 2, 3);\n" |
| 51179 | "}\n" |
| 51180 | "\n" |
| 51181 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51182 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51183 | "/// operand are greater than those in the second operand.\n" |
| 51184 | "///\n" |
| 51185 | "/// \\headerfile <x86intrin.h>\n" |
| 51186 | "///\n" |
| 51187 | "/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.\n" |
| 51188 | "///\n" |
| 51189 | "/// \\param __a\n" |
| 51190 | "/// A 128-bit vector of [4 x float].\n" |
| 51191 | "/// \\param __b\n" |
| 51192 | "/// A 128-bit vector of [4 x float].\n" |
| 51193 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51194 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51195 | "_mm_cmpgt_ps(__m128 __a, __m128 __b)\n" |
| 51196 | "{\n" |
| 51197 | " return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);\n" |
| 51198 | "}\n" |
| 51199 | "\n" |
| 51200 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51201 | "/// operands to determine if the value in the first operand is greater than\n" |
| 51202 | "/// or equal to the corresponding value in the second operand and returns\n" |
| 51203 | "/// the result of the comparison in the low-order bits of a vector of\n" |
| 51204 | "/// [4 x float].\n" |
| 51205 | "///\n" |
| 51206 | "/// \\headerfile <x86intrin.h>\n" |
| 51207 | "///\n" |
| 51208 | "/// This intrinsic corresponds to the <c> VCMPLESS / CMPLESS </c> instructions.\n" |
| 51209 | "///\n" |
| 51210 | "/// \\param __a\n" |
| 51211 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51212 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51213 | "/// \\param __b\n" |
| 51214 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51215 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51216 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51217 | "/// in the low-order bits.\n" |
| 51218 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51219 | "_mm_cmpge_ss(__m128 __a, __m128 __b)\n" |
| 51220 | "{\n" |
| 51221 | " return (__m128)__builtin_shufflevector((__v4sf)__a,\n" |
| 51222 | " (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),\n" |
| 51223 | " 4, 1, 2, 3);\n" |
| 51224 | "}\n" |
| 51225 | "\n" |
| 51226 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51227 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51228 | "/// operand are greater than or equal to those in the second operand.\n" |
| 51229 | "///\n" |
| 51230 | "/// \\headerfile <x86intrin.h>\n" |
| 51231 | "///\n" |
| 51232 | "/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.\n" |
| 51233 | "///\n" |
| 51234 | "/// \\param __a\n" |
| 51235 | "/// A 128-bit vector of [4 x float].\n" |
| 51236 | "/// \\param __b\n" |
| 51237 | "/// A 128-bit vector of [4 x float].\n" |
| 51238 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51239 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51240 | "_mm_cmpge_ps(__m128 __a, __m128 __b)\n" |
| 51241 | "{\n" |
| 51242 | " return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);\n" |
| 51243 | "}\n" |
| 51244 | "\n" |
| 51245 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51246 | "/// operands for inequality and returns the result of the comparison in the\n" |
| 51247 | "/// low-order bits of a vector of [4 x float].\n" |
| 51248 | "///\n" |
| 51249 | "/// \\headerfile <x86intrin.h>\n" |
| 51250 | "///\n" |
| 51251 | "/// This intrinsic corresponds to the <c> VCMPNEQSS / CMPNEQSS </c>\n" |
| 51252 | "/// instructions.\n" |
| 51253 | "///\n" |
| 51254 | "/// \\param __a\n" |
| 51255 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51256 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51257 | "/// \\param __b\n" |
| 51258 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51259 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51260 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51261 | "/// in the low-order bits.\n" |
| 51262 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51263 | "_mm_cmpneq_ss(__m128 __a, __m128 __b)\n" |
| 51264 | "{\n" |
| 51265 | " return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);\n" |
| 51266 | "}\n" |
| 51267 | "\n" |
| 51268 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51269 | "/// 128-bit vectors of [4 x float] for inequality.\n" |
| 51270 | "///\n" |
| 51271 | "/// \\headerfile <x86intrin.h>\n" |
| 51272 | "///\n" |
| 51273 | "/// This intrinsic corresponds to the <c> VCMPNEQPS / CMPNEQPS </c>\n" |
| 51274 | "/// instructions.\n" |
| 51275 | "///\n" |
| 51276 | "/// \\param __a\n" |
| 51277 | "/// A 128-bit vector of [4 x float].\n" |
| 51278 | "/// \\param __b\n" |
| 51279 | "/// A 128-bit vector of [4 x float].\n" |
| 51280 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51281 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51282 | "_mm_cmpneq_ps(__m128 __a, __m128 __b)\n" |
| 51283 | "{\n" |
| 51284 | " return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);\n" |
| 51285 | "}\n" |
| 51286 | "\n" |
| 51287 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51288 | "/// operands to determine if the value in the first operand is not less than\n" |
| 51289 | "/// the corresponding value in the second operand and returns the result of\n" |
| 51290 | "/// the comparison in the low-order bits of a vector of [4 x float].\n" |
| 51291 | "///\n" |
| 51292 | "/// \\headerfile <x86intrin.h>\n" |
| 51293 | "///\n" |
| 51294 | "/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n" |
| 51295 | "/// instructions.\n" |
| 51296 | "///\n" |
| 51297 | "/// \\param __a\n" |
| 51298 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51299 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51300 | "/// \\param __b\n" |
| 51301 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51302 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51303 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51304 | "/// in the low-order bits.\n" |
| 51305 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51306 | "_mm_cmpnlt_ss(__m128 __a, __m128 __b)\n" |
| 51307 | "{\n" |
| 51308 | " return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);\n" |
| 51309 | "}\n" |
| 51310 | "\n" |
| 51311 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51312 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51313 | "/// operand are not less than those in the second operand.\n" |
| 51314 | "///\n" |
| 51315 | "/// \\headerfile <x86intrin.h>\n" |
| 51316 | "///\n" |
| 51317 | "/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n" |
| 51318 | "/// instructions.\n" |
| 51319 | "///\n" |
| 51320 | "/// \\param __a\n" |
| 51321 | "/// A 128-bit vector of [4 x float].\n" |
| 51322 | "/// \\param __b\n" |
| 51323 | "/// A 128-bit vector of [4 x float].\n" |
| 51324 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51325 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51326 | "_mm_cmpnlt_ps(__m128 __a, __m128 __b)\n" |
| 51327 | "{\n" |
| 51328 | " return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);\n" |
| 51329 | "}\n" |
| 51330 | "\n" |
| 51331 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51332 | "/// operands to determine if the value in the first operand is not less than\n" |
| 51333 | "/// or equal to the corresponding value in the second operand and returns\n" |
| 51334 | "/// the result of the comparison in the low-order bits of a vector of\n" |
| 51335 | "/// [4 x float].\n" |
| 51336 | "///\n" |
| 51337 | "/// \\headerfile <x86intrin.h>\n" |
| 51338 | "///\n" |
| 51339 | "/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n" |
| 51340 | "/// instructions.\n" |
| 51341 | "///\n" |
| 51342 | "/// \\param __a\n" |
| 51343 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51344 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51345 | "/// \\param __b\n" |
| 51346 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51347 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51348 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51349 | "/// in the low-order bits.\n" |
| 51350 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51351 | "_mm_cmpnle_ss(__m128 __a, __m128 __b)\n" |
| 51352 | "{\n" |
| 51353 | " return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);\n" |
| 51354 | "}\n" |
| 51355 | "\n" |
| 51356 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51357 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51358 | "/// operand are not less than or equal to those in the second operand.\n" |
| 51359 | "///\n" |
| 51360 | "/// \\headerfile <x86intrin.h>\n" |
| 51361 | "///\n" |
| 51362 | "/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n" |
| 51363 | "/// instructions.\n" |
| 51364 | "///\n" |
| 51365 | "/// \\param __a\n" |
| 51366 | "/// A 128-bit vector of [4 x float].\n" |
| 51367 | "/// \\param __b\n" |
| 51368 | "/// A 128-bit vector of [4 x float].\n" |
| 51369 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51370 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51371 | "_mm_cmpnle_ps(__m128 __a, __m128 __b)\n" |
| 51372 | "{\n" |
| 51373 | " return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);\n" |
| 51374 | "}\n" |
| 51375 | "\n" |
| 51376 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51377 | "/// operands to determine if the value in the first operand is not greater\n" |
| 51378 | "/// than the corresponding value in the second operand and returns the\n" |
| 51379 | "/// result of the comparison in the low-order bits of a vector of\n" |
| 51380 | "/// [4 x float].\n" |
| 51381 | "///\n" |
| 51382 | "/// \\headerfile <x86intrin.h>\n" |
| 51383 | "///\n" |
| 51384 | "/// This intrinsic corresponds to the <c> VCMPNLTSS / CMPNLTSS </c>\n" |
| 51385 | "/// instructions.\n" |
| 51386 | "///\n" |
| 51387 | "/// \\param __a\n" |
| 51388 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51389 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51390 | "/// \\param __b\n" |
| 51391 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51392 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51393 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51394 | "/// in the low-order bits.\n" |
| 51395 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51396 | "_mm_cmpngt_ss(__m128 __a, __m128 __b)\n" |
| 51397 | "{\n" |
| 51398 | " return (__m128)__builtin_shufflevector((__v4sf)__a,\n" |
| 51399 | " (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),\n" |
| 51400 | " 4, 1, 2, 3);\n" |
| 51401 | "}\n" |
| 51402 | "\n" |
| 51403 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51404 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51405 | "/// operand are not greater than those in the second operand.\n" |
| 51406 | "///\n" |
| 51407 | "/// \\headerfile <x86intrin.h>\n" |
| 51408 | "///\n" |
| 51409 | "/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>\n" |
| 51410 | "/// instructions.\n" |
| 51411 | "///\n" |
| 51412 | "/// \\param __a\n" |
| 51413 | "/// A 128-bit vector of [4 x float].\n" |
| 51414 | "/// \\param __b\n" |
| 51415 | "/// A 128-bit vector of [4 x float].\n" |
| 51416 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51417 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51418 | "_mm_cmpngt_ps(__m128 __a, __m128 __b)\n" |
| 51419 | "{\n" |
| 51420 | " return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);\n" |
| 51421 | "}\n" |
| 51422 | "\n" |
| 51423 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51424 | "/// operands to determine if the value in the first operand is not greater\n" |
| 51425 | "/// than or equal to the corresponding value in the second operand and\n" |
| 51426 | "/// returns the result of the comparison in the low-order bits of a vector\n" |
| 51427 | "/// of [4 x float].\n" |
| 51428 | "///\n" |
| 51429 | "/// \\headerfile <x86intrin.h>\n" |
| 51430 | "///\n" |
| 51431 | "/// This intrinsic corresponds to the <c> VCMPNLESS / CMPNLESS </c>\n" |
| 51432 | "/// instructions.\n" |
| 51433 | "///\n" |
| 51434 | "/// \\param __a\n" |
| 51435 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51436 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51437 | "/// \\param __b\n" |
| 51438 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51439 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51440 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51441 | "/// in the low-order bits.\n" |
| 51442 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51443 | "_mm_cmpnge_ss(__m128 __a, __m128 __b)\n" |
| 51444 | "{\n" |
| 51445 | " return (__m128)__builtin_shufflevector((__v4sf)__a,\n" |
| 51446 | " (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),\n" |
| 51447 | " 4, 1, 2, 3);\n" |
| 51448 | "}\n" |
| 51449 | "\n" |
| 51450 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51451 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51452 | "/// operand are not greater than or equal to those in the second operand.\n" |
| 51453 | "///\n" |
| 51454 | "/// \\headerfile <x86intrin.h>\n" |
| 51455 | "///\n" |
| 51456 | "/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>\n" |
| 51457 | "/// instructions.\n" |
| 51458 | "///\n" |
| 51459 | "/// \\param __a\n" |
| 51460 | "/// A 128-bit vector of [4 x float].\n" |
| 51461 | "/// \\param __b\n" |
| 51462 | "/// A 128-bit vector of [4 x float].\n" |
| 51463 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51464 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51465 | "_mm_cmpnge_ps(__m128 __a, __m128 __b)\n" |
| 51466 | "{\n" |
| 51467 | " return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);\n" |
| 51468 | "}\n" |
| 51469 | "\n" |
| 51470 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51471 | "/// operands to determine if the value in the first operand is ordered with\n" |
| 51472 | "/// respect to the corresponding value in the second operand and returns the\n" |
| 51473 | "/// result of the comparison in the low-order bits of a vector of\n" |
| 51474 | "/// [4 x float].\n" |
| 51475 | "///\n" |
| 51476 | "/// \\headerfile <x86intrin.h>\n" |
| 51477 | "///\n" |
| 51478 | "/// This intrinsic corresponds to the <c> VCMPORDSS / CMPORDSS </c>\n" |
| 51479 | "/// instructions.\n" |
| 51480 | "///\n" |
| 51481 | "/// \\param __a\n" |
| 51482 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51483 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51484 | "/// \\param __b\n" |
| 51485 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51486 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51487 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51488 | "/// in the low-order bits.\n" |
| 51489 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51490 | "_mm_cmpord_ss(__m128 __a, __m128 __b)\n" |
| 51491 | "{\n" |
| 51492 | " return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);\n" |
| 51493 | "}\n" |
| 51494 | "\n" |
| 51495 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51496 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51497 | "/// operand are ordered with respect to those in the second operand.\n" |
| 51498 | "///\n" |
| 51499 | "/// \\headerfile <x86intrin.h>\n" |
| 51500 | "///\n" |
| 51501 | "/// This intrinsic corresponds to the <c> VCMPORDPS / CMPORDPS </c>\n" |
| 51502 | "/// instructions.\n" |
| 51503 | "///\n" |
| 51504 | "/// \\param __a\n" |
| 51505 | "/// A 128-bit vector of [4 x float].\n" |
| 51506 | "/// \\param __b\n" |
| 51507 | "/// A 128-bit vector of [4 x float].\n" |
| 51508 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51509 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51510 | "_mm_cmpord_ps(__m128 __a, __m128 __b)\n" |
| 51511 | "{\n" |
| 51512 | " return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);\n" |
| 51513 | "}\n" |
| 51514 | "\n" |
| 51515 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51516 | "/// operands to determine if the value in the first operand is unordered\n" |
| 51517 | "/// with respect to the corresponding value in the second operand and\n" |
| 51518 | "/// returns the result of the comparison in the low-order bits of a vector\n" |
| 51519 | "/// of [4 x float].\n" |
| 51520 | "///\n" |
| 51521 | "/// \\headerfile <x86intrin.h>\n" |
| 51522 | "///\n" |
| 51523 | "/// This intrinsic corresponds to the <c> VCMPUNORDSS / CMPUNORDSS </c>\n" |
| 51524 | "/// instructions.\n" |
| 51525 | "///\n" |
| 51526 | "/// \\param __a\n" |
| 51527 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51528 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51529 | "/// \\param __b\n" |
| 51530 | "/// A 128-bit vector of [4 x float] containing one of the operands. The lower\n" |
| 51531 | "/// 32 bits of this operand are used in the comparison.\n" |
| 51532 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results\n" |
| 51533 | "/// in the low-order bits.\n" |
| 51534 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51535 | "_mm_cmpunord_ss(__m128 __a, __m128 __b)\n" |
| 51536 | "{\n" |
| 51537 | " return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);\n" |
| 51538 | "}\n" |
| 51539 | "\n" |
| 51540 | "/// Compares each of the corresponding 32-bit float values of the\n" |
| 51541 | "/// 128-bit vectors of [4 x float] to determine if the values in the first\n" |
| 51542 | "/// operand are unordered with respect to those in the second operand.\n" |
| 51543 | "///\n" |
| 51544 | "/// \\headerfile <x86intrin.h>\n" |
| 51545 | "///\n" |
| 51546 | "/// This intrinsic corresponds to the <c> VCMPUNORDPS / CMPUNORDPS </c>\n" |
| 51547 | "/// instructions.\n" |
| 51548 | "///\n" |
| 51549 | "/// \\param __a\n" |
| 51550 | "/// A 128-bit vector of [4 x float].\n" |
| 51551 | "/// \\param __b\n" |
| 51552 | "/// A 128-bit vector of [4 x float].\n" |
| 51553 | "/// \\returns A 128-bit vector of [4 x float] containing the comparison results.\n" |
| 51554 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 51555 | "_mm_cmpunord_ps(__m128 __a, __m128 __b)\n" |
| 51556 | "{\n" |
| 51557 | " return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);\n" |
| 51558 | "}\n" |
| 51559 | "\n" |
| 51560 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51561 | "/// operands for equality and returns the result of the comparison.\n" |
| 51562 | "///\n" |
| 51563 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51564 | "///\n" |
| 51565 | "/// \\headerfile <x86intrin.h>\n" |
| 51566 | "///\n" |
| 51567 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n" |
| 51568 | "/// instructions.\n" |
| 51569 | "///\n" |
| 51570 | "/// \\param __a\n" |
| 51571 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51572 | "/// used in the comparison.\n" |
| 51573 | "/// \\param __b\n" |
| 51574 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51575 | "/// used in the comparison.\n" |
| 51576 | "/// \\returns An integer containing the comparison results. If either of the\n" |
| 51577 | "/// two lower 32-bit values is NaN, 0 is returned.\n" |
| 51578 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51579 | "_mm_comieq_ss(__m128 __a, __m128 __b)\n" |
| 51580 | "{\n" |
| 51581 | " return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);\n" |
| 51582 | "}\n" |
| 51583 | "\n" |
| 51584 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51585 | "/// operands to determine if the first operand is less than the second\n" |
| 51586 | "/// operand and returns the result of the comparison.\n" |
| 51587 | "///\n" |
| 51588 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51589 | "///\n" |
| 51590 | "/// \\headerfile <x86intrin.h>\n" |
| 51591 | "///\n" |
| 51592 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c>\n" |
| 51593 | "/// instructions.\n" |
| 51594 | "///\n" |
| 51595 | "/// \\param __a\n" |
| 51596 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51597 | "/// used in the comparison.\n" |
| 51598 | "/// \\param __b\n" |
| 51599 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51600 | "/// used in the comparison.\n" |
| 51601 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51602 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
| 51603 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51604 | "_mm_comilt_ss(__m128 __a, __m128 __b)\n" |
| 51605 | "{\n" |
| 51606 | " return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);\n" |
| 51607 | "}\n" |
| 51608 | "\n" |
| 51609 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51610 | "/// operands to determine if the first operand is less than or equal to the\n" |
| 51611 | "/// second operand and returns the result of the comparison.\n" |
| 51612 | "///\n" |
| 51613 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51614 | "///\n" |
| 51615 | "/// \\headerfile <x86intrin.h>\n" |
| 51616 | "///\n" |
| 51617 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n" |
| 51618 | "///\n" |
| 51619 | "/// \\param __a\n" |
| 51620 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51621 | "/// used in the comparison.\n" |
| 51622 | "/// \\param __b\n" |
| 51623 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51624 | "/// used in the comparison.\n" |
| 51625 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51626 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
| 51627 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51628 | "_mm_comile_ss(__m128 __a, __m128 __b)\n" |
| 51629 | "{\n" |
| 51630 | " return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);\n" |
| 51631 | "}\n" |
| 51632 | "\n" |
| 51633 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51634 | "/// operands to determine if the first operand is greater than the second\n" |
| 51635 | "/// operand and returns the result of the comparison.\n" |
| 51636 | "///\n" |
| 51637 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51638 | "///\n" |
| 51639 | "/// \\headerfile <x86intrin.h>\n" |
| 51640 | "///\n" |
| 51641 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n" |
| 51642 | "///\n" |
| 51643 | "/// \\param __a\n" |
| 51644 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51645 | "/// used in the comparison.\n" |
| 51646 | "/// \\param __b\n" |
| 51647 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51648 | "/// used in the comparison.\n" |
| 51649 | "/// \\returns An integer containing the comparison results. If either of the\n" |
| 51650 | "/// two lower 32-bit values is NaN, 0 is returned.\n" |
| 51651 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51652 | "_mm_comigt_ss(__m128 __a, __m128 __b)\n" |
| 51653 | "{\n" |
| 51654 | " return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);\n" |
| 51655 | "}\n" |
| 51656 | "\n" |
| 51657 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51658 | "/// operands to determine if the first operand is greater than or equal to\n" |
| 51659 | "/// the second operand and returns the result of the comparison.\n" |
| 51660 | "///\n" |
| 51661 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51662 | "///\n" |
| 51663 | "/// \\headerfile <x86intrin.h>\n" |
| 51664 | "///\n" |
| 51665 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n" |
| 51666 | "///\n" |
| 51667 | "/// \\param __a\n" |
| 51668 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51669 | "/// used in the comparison.\n" |
| 51670 | "/// \\param __b\n" |
| 51671 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51672 | "/// used in the comparison.\n" |
| 51673 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51674 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
| 51675 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51676 | "_mm_comige_ss(__m128 __a, __m128 __b)\n" |
| 51677 | "{\n" |
| 51678 | " return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);\n" |
| 51679 | "}\n" |
| 51680 | "\n" |
| 51681 | "/// Compares two 32-bit float values in the low-order bits of both\n" |
| 51682 | "/// operands to determine if the first operand is not equal to the second\n" |
| 51683 | "/// operand and returns the result of the comparison.\n" |
| 51684 | "///\n" |
| 51685 | "/// If either of the two lower 32-bit values is NaN, 1 is returned.\n" |
| 51686 | "///\n" |
| 51687 | "/// \\headerfile <x86intrin.h>\n" |
| 51688 | "///\n" |
| 51689 | "/// This intrinsic corresponds to the <c> VCOMISS / COMISS </c> instructions.\n" |
| 51690 | "///\n" |
| 51691 | "/// \\param __a\n" |
| 51692 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51693 | "/// used in the comparison.\n" |
| 51694 | "/// \\param __b\n" |
| 51695 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51696 | "/// used in the comparison.\n" |
| 51697 | "/// \\returns An integer containing the comparison results. If either of the\n" |
| 51698 | "/// two lower 32-bit values is NaN, 1 is returned.\n" |
| 51699 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51700 | "_mm_comineq_ss(__m128 __a, __m128 __b)\n" |
| 51701 | "{\n" |
| 51702 | " return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);\n" |
| 51703 | "}\n" |
| 51704 | "\n" |
| 51705 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
| 51706 | "/// the low-order bits of both operands to determine equality and returns\n" |
| 51707 | "/// the result of the comparison.\n" |
| 51708 | "///\n" |
| 51709 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51710 | "///\n" |
| 51711 | "/// \\headerfile <x86intrin.h>\n" |
| 51712 | "///\n" |
| 51713 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
| 51714 | "///\n" |
| 51715 | "/// \\param __a\n" |
| 51716 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51717 | "/// used in the comparison.\n" |
| 51718 | "/// \\param __b\n" |
| 51719 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51720 | "/// used in the comparison.\n" |
| 51721 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51722 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
| 51723 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51724 | "_mm_ucomieq_ss(__m128 __a, __m128 __b)\n" |
| 51725 | "{\n" |
| 51726 | " return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);\n" |
| 51727 | "}\n" |
| 51728 | "\n" |
| 51729 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
| 51730 | "/// the low-order bits of both operands to determine if the first operand is\n" |
| 51731 | "/// less than the second operand and returns the result of the comparison.\n" |
| 51732 | "///\n" |
| 51733 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51734 | "///\n" |
| 51735 | "/// \\headerfile <x86intrin.h>\n" |
| 51736 | "///\n" |
| 51737 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
| 51738 | "///\n" |
| 51739 | "/// \\param __a\n" |
| 51740 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51741 | "/// used in the comparison.\n" |
| 51742 | "/// \\param __b\n" |
| 51743 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51744 | "/// used in the comparison.\n" |
| 51745 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51746 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
| 51747 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51748 | "_mm_ucomilt_ss(__m128 __a, __m128 __b)\n" |
| 51749 | "{\n" |
| 51750 | " return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);\n" |
| 51751 | "}\n" |
| 51752 | "\n" |
| 51753 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
| 51754 | "/// the low-order bits of both operands to determine if the first operand is\n" |
| 51755 | "/// less than or equal to the second operand and returns the result of the\n" |
| 51756 | "/// comparison.\n" |
| 51757 | "///\n" |
| 51758 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51759 | "///\n" |
| 51760 | "/// \\headerfile <x86intrin.h>\n" |
| 51761 | "///\n" |
| 51762 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
| 51763 | "///\n" |
| 51764 | "/// \\param __a\n" |
| 51765 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51766 | "/// used in the comparison.\n" |
| 51767 | "/// \\param __b\n" |
| 51768 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51769 | "/// used in the comparison.\n" |
| 51770 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51771 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
| 51772 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51773 | "_mm_ucomile_ss(__m128 __a, __m128 __b)\n" |
| 51774 | "{\n" |
| 51775 | " return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);\n" |
| 51776 | "}\n" |
| 51777 | "\n" |
| 51778 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
| 51779 | "/// the low-order bits of both operands to determine if the first operand is\n" |
| 51780 | "/// greater than the second operand and returns the result of the\n" |
| 51781 | "/// comparison.\n" |
| 51782 | "///\n" |
| 51783 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51784 | "///\n" |
| 51785 | "/// \\headerfile <x86intrin.h>\n" |
| 51786 | "///\n" |
| 51787 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
| 51788 | "///\n" |
| 51789 | "/// \\param __a\n" |
| 51790 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51791 | "/// used in the comparison.\n" |
| 51792 | "/// \\param __b\n" |
| 51793 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51794 | "/// used in the comparison.\n" |
| 51795 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51796 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
| 51797 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51798 | "_mm_ucomigt_ss(__m128 __a, __m128 __b)\n" |
| 51799 | "{\n" |
| 51800 | " return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);\n" |
| 51801 | "}\n" |
| 51802 | "\n" |
| 51803 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
| 51804 | "/// the low-order bits of both operands to determine if the first operand is\n" |
| 51805 | "/// greater than or equal to the second operand and returns the result of\n" |
| 51806 | "/// the comparison.\n" |
| 51807 | "///\n" |
| 51808 | "/// If either of the two lower 32-bit values is NaN, 0 is returned.\n" |
| 51809 | "///\n" |
| 51810 | "/// \\headerfile <x86intrin.h>\n" |
| 51811 | "///\n" |
| 51812 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
| 51813 | "///\n" |
| 51814 | "/// \\param __a\n" |
| 51815 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51816 | "/// used in the comparison.\n" |
| 51817 | "/// \\param __b\n" |
| 51818 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51819 | "/// used in the comparison.\n" |
| 51820 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51821 | "/// lower 32-bit values is NaN, 0 is returned.\n" |
| 51822 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51823 | "_mm_ucomige_ss(__m128 __a, __m128 __b)\n" |
| 51824 | "{\n" |
| 51825 | " return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);\n" |
| 51826 | "}\n" |
| 51827 | "\n" |
| 51828 | "/// Performs an unordered comparison of two 32-bit float values using\n" |
| 51829 | "/// the low-order bits of both operands to determine inequality and returns\n" |
| 51830 | "/// the result of the comparison.\n" |
| 51831 | "///\n" |
| 51832 | "/// If either of the two lower 32-bit values is NaN, 1 is returned.\n" |
| 51833 | "///\n" |
| 51834 | "/// \\headerfile <x86intrin.h>\n" |
| 51835 | "///\n" |
| 51836 | "/// This intrinsic corresponds to the <c> VUCOMISS / UCOMISS </c> instructions.\n" |
| 51837 | "///\n" |
| 51838 | "/// \\param __a\n" |
| 51839 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51840 | "/// used in the comparison.\n" |
| 51841 | "/// \\param __b\n" |
| 51842 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51843 | "/// used in the comparison.\n" |
| 51844 | "/// \\returns An integer containing the comparison results. If either of the two\n" |
| 51845 | "/// lower 32-bit values is NaN, 1 is returned.\n" |
| 51846 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51847 | "_mm_ucomineq_ss(__m128 __a, __m128 __b)\n" |
| 51848 | "{\n" |
| 51849 | " return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);\n" |
| 51850 | "}\n" |
| 51851 | "\n" |
| 51852 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
| 51853 | "/// [4 x float] into a 32-bit integer.\n" |
| 51854 | "///\n" |
| 51855 | "/// \\headerfile <x86intrin.h>\n" |
| 51856 | "///\n" |
| 51857 | "/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n" |
| 51858 | "/// instructions.\n" |
| 51859 | "///\n" |
| 51860 | "/// \\param __a\n" |
| 51861 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51862 | "/// used in the conversion.\n" |
| 51863 | "/// \\returns A 32-bit integer containing the converted value.\n" |
| 51864 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51865 | "_mm_cvtss_si32(__m128 __a)\n" |
| 51866 | "{\n" |
| 51867 | " return __builtin_ia32_cvtss2si((__v4sf)__a);\n" |
| 51868 | "}\n" |
| 51869 | "\n" |
| 51870 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
| 51871 | "/// [4 x float] into a 32-bit integer.\n" |
| 51872 | "///\n" |
| 51873 | "/// \\headerfile <x86intrin.h>\n" |
| 51874 | "///\n" |
| 51875 | "/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n" |
| 51876 | "/// instructions.\n" |
| 51877 | "///\n" |
| 51878 | "/// \\param __a\n" |
| 51879 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51880 | "/// used in the conversion.\n" |
| 51881 | "/// \\returns A 32-bit integer containing the converted value.\n" |
| 51882 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51883 | "_mm_cvt_ss2si(__m128 __a)\n" |
| 51884 | "{\n" |
| 51885 | " return _mm_cvtss_si32(__a);\n" |
| 51886 | "}\n" |
| 51887 | "\n" |
| 51888 | "#ifdef __x86_64__\n" |
| 51889 | "\n" |
| 51890 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
| 51891 | "/// [4 x float] into a 64-bit integer.\n" |
| 51892 | "///\n" |
| 51893 | "/// \\headerfile <x86intrin.h>\n" |
| 51894 | "///\n" |
| 51895 | "/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>\n" |
| 51896 | "/// instructions.\n" |
| 51897 | "///\n" |
| 51898 | "/// \\param __a\n" |
| 51899 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51900 | "/// used in the conversion.\n" |
| 51901 | "/// \\returns A 64-bit integer containing the converted value.\n" |
| 51902 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
| 51903 | "_mm_cvtss_si64(__m128 __a)\n" |
| 51904 | "{\n" |
| 51905 | " return __builtin_ia32_cvtss2si64((__v4sf)__a);\n" |
| 51906 | "}\n" |
| 51907 | "\n" |
| 51908 | "#endif\n" |
| 51909 | "\n" |
| 51910 | "/// Converts two low-order float values in a 128-bit vector of\n" |
| 51911 | "/// [4 x float] into a 64-bit vector of [2 x i32].\n" |
| 51912 | "///\n" |
| 51913 | "/// \\headerfile <x86intrin.h>\n" |
| 51914 | "///\n" |
| 51915 | "/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n" |
| 51916 | "///\n" |
| 51917 | "/// \\param __a\n" |
| 51918 | "/// A 128-bit vector of [4 x float].\n" |
| 51919 | "/// \\returns A 64-bit integer vector containing the converted values.\n" |
| 51920 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 51921 | "_mm_cvtps_pi32(__m128 __a)\n" |
| 51922 | "{\n" |
| 51923 | " return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);\n" |
| 51924 | "}\n" |
| 51925 | "\n" |
| 51926 | "/// Converts two low-order float values in a 128-bit vector of\n" |
| 51927 | "/// [4 x float] into a 64-bit vector of [2 x i32].\n" |
| 51928 | "///\n" |
| 51929 | "/// \\headerfile <x86intrin.h>\n" |
| 51930 | "///\n" |
| 51931 | "/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.\n" |
| 51932 | "///\n" |
| 51933 | "/// \\param __a\n" |
| 51934 | "/// A 128-bit vector of [4 x float].\n" |
| 51935 | "/// \\returns A 64-bit integer vector containing the converted values.\n" |
| 51936 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 51937 | "_mm_cvt_ps2pi(__m128 __a)\n" |
| 51938 | "{\n" |
| 51939 | " return _mm_cvtps_pi32(__a);\n" |
| 51940 | "}\n" |
| 51941 | "\n" |
| 51942 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
| 51943 | "/// [4 x float] into a 32-bit integer, truncating the result when it is\n" |
| 51944 | "/// inexact.\n" |
| 51945 | "///\n" |
| 51946 | "/// \\headerfile <x86intrin.h>\n" |
| 51947 | "///\n" |
| 51948 | "/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n" |
| 51949 | "/// instructions.\n" |
| 51950 | "///\n" |
| 51951 | "/// \\param __a\n" |
| 51952 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51953 | "/// used in the conversion.\n" |
| 51954 | "/// \\returns A 32-bit integer containing the converted value.\n" |
| 51955 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51956 | "_mm_cvttss_si32(__m128 __a)\n" |
| 51957 | "{\n" |
| 51958 | " return __builtin_ia32_cvttss2si((__v4sf)__a);\n" |
| 51959 | "}\n" |
| 51960 | "\n" |
| 51961 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
| 51962 | "/// [4 x float] into a 32-bit integer, truncating the result when it is\n" |
| 51963 | "/// inexact.\n" |
| 51964 | "///\n" |
| 51965 | "/// \\headerfile <x86intrin.h>\n" |
| 51966 | "///\n" |
| 51967 | "/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n" |
| 51968 | "/// instructions.\n" |
| 51969 | "///\n" |
| 51970 | "/// \\param __a\n" |
| 51971 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51972 | "/// used in the conversion.\n" |
| 51973 | "/// \\returns A 32-bit integer containing the converted value.\n" |
| 51974 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 51975 | "_mm_cvtt_ss2si(__m128 __a)\n" |
| 51976 | "{\n" |
| 51977 | " return _mm_cvttss_si32(__a);\n" |
| 51978 | "}\n" |
| 51979 | "\n" |
| 51980 | "#ifdef __x86_64__\n" |
| 51981 | "/// Converts a float value contained in the lower 32 bits of a vector of\n" |
| 51982 | "/// [4 x float] into a 64-bit integer, truncating the result when it is\n" |
| 51983 | "/// inexact.\n" |
| 51984 | "///\n" |
| 51985 | "/// \\headerfile <x86intrin.h>\n" |
| 51986 | "///\n" |
| 51987 | "/// This intrinsic corresponds to the <c> VCVTTSS2SI / CVTTSS2SI </c>\n" |
| 51988 | "/// instructions.\n" |
| 51989 | "///\n" |
| 51990 | "/// \\param __a\n" |
| 51991 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 51992 | "/// used in the conversion.\n" |
| 51993 | "/// \\returns A 64-bit integer containing the converted value.\n" |
| 51994 | "static __inline__ long long __DEFAULT_FN_ATTRS\n" |
| 51995 | "_mm_cvttss_si64(__m128 __a)\n" |
| 51996 | "{\n" |
| 51997 | " return __builtin_ia32_cvttss2si64((__v4sf)__a);\n" |
| 51998 | "}\n" |
| 51999 | "#endif\n" |
| 52000 | "\n" |
| 52001 | "/// Converts two low-order float values in a 128-bit vector of\n" |
| 52002 | "/// [4 x float] into a 64-bit vector of [2 x i32], truncating the result\n" |
| 52003 | "/// when it is inexact.\n" |
| 52004 | "///\n" |
| 52005 | "/// \\headerfile <x86intrin.h>\n" |
| 52006 | "///\n" |
| 52007 | "/// This intrinsic corresponds to the <c> CVTTPS2PI / VTTPS2PI </c>\n" |
| 52008 | "/// instructions.\n" |
| 52009 | "///\n" |
| 52010 | "/// \\param __a\n" |
| 52011 | "/// A 128-bit vector of [4 x float].\n" |
| 52012 | "/// \\returns A 64-bit integer vector containing the converted values.\n" |
| 52013 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52014 | "_mm_cvttps_pi32(__m128 __a)\n" |
| 52015 | "{\n" |
| 52016 | " return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);\n" |
| 52017 | "}\n" |
| 52018 | "\n" |
| 52019 | "/// Converts two low-order float values in a 128-bit vector of [4 x\n" |
| 52020 | "/// float] into a 64-bit vector of [2 x i32], truncating the result when it\n" |
| 52021 | "/// is inexact.\n" |
| 52022 | "///\n" |
| 52023 | "/// \\headerfile <x86intrin.h>\n" |
| 52024 | "///\n" |
| 52025 | "/// This intrinsic corresponds to the <c> CVTTPS2PI </c> instruction.\n" |
| 52026 | "///\n" |
| 52027 | "/// \\param __a\n" |
| 52028 | "/// A 128-bit vector of [4 x float].\n" |
| 52029 | "/// \\returns A 64-bit integer vector containing the converted values.\n" |
| 52030 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52031 | "_mm_cvtt_ps2pi(__m128 __a)\n" |
| 52032 | "{\n" |
| 52033 | " return _mm_cvttps_pi32(__a);\n" |
| 52034 | "}\n" |
| 52035 | "\n" |
| 52036 | "/// Converts a 32-bit signed integer value into a floating point value\n" |
| 52037 | "/// and writes it to the lower 32 bits of the destination. The remaining\n" |
| 52038 | "/// higher order elements of the destination vector are copied from the\n" |
| 52039 | "/// corresponding elements in the first operand.\n" |
| 52040 | "///\n" |
| 52041 | "/// \\headerfile <x86intrin.h>\n" |
| 52042 | "///\n" |
| 52043 | "/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n" |
| 52044 | "///\n" |
| 52045 | "/// \\param __a\n" |
| 52046 | "/// A 128-bit vector of [4 x float].\n" |
| 52047 | "/// \\param __b\n" |
| 52048 | "/// A 32-bit signed integer operand containing the value to be converted.\n" |
| 52049 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
| 52050 | "/// converted value of the second operand. The upper 96 bits are copied from\n" |
| 52051 | "/// the upper 96 bits of the first operand.\n" |
| 52052 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52053 | "_mm_cvtsi32_ss(__m128 __a, int __b)\n" |
| 52054 | "{\n" |
| 52055 | " __a[0] = __b;\n" |
| 52056 | " return __a;\n" |
| 52057 | "}\n" |
| 52058 | "\n" |
| 52059 | "/// Converts a 32-bit signed integer value into a floating point value\n" |
| 52060 | "/// and writes it to the lower 32 bits of the destination. The remaining\n" |
| 52061 | "/// higher order elements of the destination are copied from the\n" |
| 52062 | "/// corresponding elements in the first operand.\n" |
| 52063 | "///\n" |
| 52064 | "/// \\headerfile <x86intrin.h>\n" |
| 52065 | "///\n" |
| 52066 | "/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n" |
| 52067 | "///\n" |
| 52068 | "/// \\param __a\n" |
| 52069 | "/// A 128-bit vector of [4 x float].\n" |
| 52070 | "/// \\param __b\n" |
| 52071 | "/// A 32-bit signed integer operand containing the value to be converted.\n" |
| 52072 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
| 52073 | "/// converted value of the second operand. The upper 96 bits are copied from\n" |
| 52074 | "/// the upper 96 bits of the first operand.\n" |
| 52075 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52076 | "_mm_cvt_si2ss(__m128 __a, int __b)\n" |
| 52077 | "{\n" |
| 52078 | " return _mm_cvtsi32_ss(__a, __b);\n" |
| 52079 | "}\n" |
| 52080 | "\n" |
| 52081 | "#ifdef __x86_64__\n" |
| 52082 | "\n" |
| 52083 | "/// Converts a 64-bit signed integer value into a floating point value\n" |
| 52084 | "/// and writes it to the lower 32 bits of the destination. The remaining\n" |
| 52085 | "/// higher order elements of the destination are copied from the\n" |
| 52086 | "/// corresponding elements in the first operand.\n" |
| 52087 | "///\n" |
| 52088 | "/// \\headerfile <x86intrin.h>\n" |
| 52089 | "///\n" |
| 52090 | "/// This intrinsic corresponds to the <c> VCVTSI2SS / CVTSI2SS </c> instruction.\n" |
| 52091 | "///\n" |
| 52092 | "/// \\param __a\n" |
| 52093 | "/// A 128-bit vector of [4 x float].\n" |
| 52094 | "/// \\param __b\n" |
| 52095 | "/// A 64-bit signed integer operand containing the value to be converted.\n" |
| 52096 | "/// \\returns A 128-bit vector of [4 x float] whose lower 32 bits contain the\n" |
| 52097 | "/// converted value of the second operand. The upper 96 bits are copied from\n" |
| 52098 | "/// the upper 96 bits of the first operand.\n" |
| 52099 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52100 | "_mm_cvtsi64_ss(__m128 __a, long long __b)\n" |
| 52101 | "{\n" |
| 52102 | " __a[0] = __b;\n" |
| 52103 | " return __a;\n" |
| 52104 | "}\n" |
| 52105 | "\n" |
| 52106 | "#endif\n" |
| 52107 | "\n" |
| 52108 | "/// Converts two elements of a 64-bit vector of [2 x i32] into two\n" |
| 52109 | "/// floating point values and writes them to the lower 64-bits of the\n" |
| 52110 | "/// destination. The remaining higher order elements of the destination are\n" |
| 52111 | "/// copied from the corresponding elements in the first operand.\n" |
| 52112 | "///\n" |
| 52113 | "/// \\headerfile <x86intrin.h>\n" |
| 52114 | "///\n" |
| 52115 | "/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n" |
| 52116 | "///\n" |
| 52117 | "/// \\param __a\n" |
| 52118 | "/// A 128-bit vector of [4 x float].\n" |
| 52119 | "/// \\param __b\n" |
| 52120 | "/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n" |
| 52121 | "/// and written to the corresponding low-order elements in the destination.\n" |
| 52122 | "/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n" |
| 52123 | "/// converted value of the second operand. The upper 64 bits are copied from\n" |
| 52124 | "/// the upper 64 bits of the first operand.\n" |
| 52125 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
| 52126 | "_mm_cvtpi32_ps(__m128 __a, __m64 __b)\n" |
| 52127 | "{\n" |
| 52128 | " return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);\n" |
| 52129 | "}\n" |
| 52130 | "\n" |
| 52131 | "/// Converts two elements of a 64-bit vector of [2 x i32] into two\n" |
| 52132 | "/// floating point values and writes them to the lower 64-bits of the\n" |
| 52133 | "/// destination. The remaining higher order elements of the destination are\n" |
| 52134 | "/// copied from the corresponding elements in the first operand.\n" |
| 52135 | "///\n" |
| 52136 | "/// \\headerfile <x86intrin.h>\n" |
| 52137 | "///\n" |
| 52138 | "/// This intrinsic corresponds to the <c> CVTPI2PS </c> instruction.\n" |
| 52139 | "///\n" |
| 52140 | "/// \\param __a\n" |
| 52141 | "/// A 128-bit vector of [4 x float].\n" |
| 52142 | "/// \\param __b\n" |
| 52143 | "/// A 64-bit vector of [2 x i32]. The elements in this vector are converted\n" |
| 52144 | "/// and written to the corresponding low-order elements in the destination.\n" |
| 52145 | "/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n" |
| 52146 | "/// converted value from the second operand. The upper 64 bits are copied\n" |
| 52147 | "/// from the upper 64 bits of the first operand.\n" |
| 52148 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
| 52149 | "_mm_cvt_pi2ps(__m128 __a, __m64 __b)\n" |
| 52150 | "{\n" |
| 52151 | " return _mm_cvtpi32_ps(__a, __b);\n" |
| 52152 | "}\n" |
| 52153 | "\n" |
| 52154 | "/// Extracts a float value contained in the lower 32 bits of a vector of\n" |
| 52155 | "/// [4 x float].\n" |
| 52156 | "///\n" |
| 52157 | "/// \\headerfile <x86intrin.h>\n" |
| 52158 | "///\n" |
| 52159 | "/// This intrinsic has no corresponding instruction.\n" |
| 52160 | "///\n" |
| 52161 | "/// \\param __a\n" |
| 52162 | "/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are\n" |
| 52163 | "/// used in the extraction.\n" |
| 52164 | "/// \\returns A 32-bit float containing the extracted value.\n" |
| 52165 | "static __inline__ float __DEFAULT_FN_ATTRS\n" |
| 52166 | "_mm_cvtss_f32(__m128 __a)\n" |
| 52167 | "{\n" |
| 52168 | " return __a[0];\n" |
| 52169 | "}\n" |
| 52170 | "\n" |
| 52171 | "/// Loads two packed float values from the address \\a __p into the\n" |
| 52172 | "/// high-order bits of a 128-bit vector of [4 x float]. The low-order bits\n" |
| 52173 | "/// are copied from the low-order bits of the first operand.\n" |
| 52174 | "///\n" |
| 52175 | "/// \\headerfile <x86intrin.h>\n" |
| 52176 | "///\n" |
| 52177 | "/// This intrinsic corresponds to the <c> VMOVHPD / MOVHPD </c> instruction.\n" |
| 52178 | "///\n" |
| 52179 | "/// \\param __a\n" |
| 52180 | "/// A 128-bit vector of [4 x float]. Bits [63:0] are written to bits [63:0]\n" |
| 52181 | "/// of the destination.\n" |
| 52182 | "/// \\param __p\n" |
| 52183 | "/// A pointer to two packed float values. Bits [63:0] are written to bits\n" |
| 52184 | "/// [127:64] of the destination.\n" |
| 52185 | "/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n" |
| 52186 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52187 | "_mm_loadh_pi(__m128 __a, const __m64 *__p)\n" |
| 52188 | "{\n" |
| 52189 | " typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));\n" |
| 52190 | " struct __mm_loadh_pi_struct {\n" |
| 52191 | " __mm_loadh_pi_v2f32 __u;\n" |
| 52192 | " } __attribute__((__packed__, __may_alias__));\n" |
| 52193 | " __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;\n" |
| 52194 | " __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n" |
| 52195 | " return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);\n" |
| 52196 | "}\n" |
| 52197 | "\n" |
| 52198 | "/// Loads two packed float values from the address \\a __p into the\n" |
| 52199 | "/// low-order bits of a 128-bit vector of [4 x float]. The high-order bits\n" |
| 52200 | "/// are copied from the high-order bits of the first operand.\n" |
| 52201 | "///\n" |
| 52202 | "/// \\headerfile <x86intrin.h>\n" |
| 52203 | "///\n" |
| 52204 | "/// This intrinsic corresponds to the <c> VMOVLPD / MOVLPD </c> instruction.\n" |
| 52205 | "///\n" |
| 52206 | "/// \\param __a\n" |
| 52207 | "/// A 128-bit vector of [4 x float]. Bits [127:64] are written to bits\n" |
| 52208 | "/// [127:64] of the destination.\n" |
| 52209 | "/// \\param __p\n" |
| 52210 | "/// A pointer to two packed float values. Bits [63:0] are written to bits\n" |
| 52211 | "/// [63:0] of the destination.\n" |
| 52212 | "/// \\returns A 128-bit vector of [4 x float] containing the moved values.\n" |
| 52213 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52214 | "_mm_loadl_pi(__m128 __a, const __m64 *__p)\n" |
| 52215 | "{\n" |
| 52216 | " typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));\n" |
| 52217 | " struct __mm_loadl_pi_struct {\n" |
| 52218 | " __mm_loadl_pi_v2f32 __u;\n" |
| 52219 | " } __attribute__((__packed__, __may_alias__));\n" |
| 52220 | " __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;\n" |
| 52221 | " __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);\n" |
| 52222 | " return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);\n" |
| 52223 | "}\n" |
| 52224 | "\n" |
| 52225 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
| 52226 | "/// 32 bits of the vector are initialized with the single-precision\n" |
| 52227 | "/// floating-point value loaded from a specified memory location. The upper\n" |
| 52228 | "/// 96 bits are set to zero.\n" |
| 52229 | "///\n" |
| 52230 | "/// \\headerfile <x86intrin.h>\n" |
| 52231 | "///\n" |
| 52232 | "/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n" |
| 52233 | "///\n" |
| 52234 | "/// \\param __p\n" |
| 52235 | "/// A pointer to a 32-bit memory location containing a single-precision\n" |
| 52236 | "/// floating-point value.\n" |
| 52237 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n" |
| 52238 | "/// lower 32 bits contain the value loaded from the memory location. The\n" |
| 52239 | "/// upper 96 bits are set to zero.\n" |
| 52240 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52241 | "_mm_load_ss(const float *__p)\n" |
| 52242 | "{\n" |
| 52243 | " struct __mm_load_ss_struct {\n" |
| 52244 | " float __u;\n" |
| 52245 | " } __attribute__((__packed__, __may_alias__));\n" |
| 52246 | " float __u = ((struct __mm_load_ss_struct*)__p)->__u;\n" |
| 52247 | " return __extension__ (__m128){ __u, 0, 0, 0 };\n" |
| 52248 | "}\n" |
| 52249 | "\n" |
| 52250 | "/// Loads a 32-bit float value and duplicates it to all four vector\n" |
| 52251 | "/// elements of a 128-bit vector of [4 x float].\n" |
| 52252 | "///\n" |
| 52253 | "/// \\headerfile <x86intrin.h>\n" |
| 52254 | "///\n" |
| 52255 | "/// This intrinsic corresponds to the <c> VBROADCASTSS / MOVSS + shuffling </c>\n" |
| 52256 | "/// instruction.\n" |
| 52257 | "///\n" |
| 52258 | "/// \\param __p\n" |
| 52259 | "/// A pointer to a float value to be loaded and duplicated.\n" |
| 52260 | "/// \\returns A 128-bit vector of [4 x float] containing the loaded and\n" |
| 52261 | "/// duplicated values.\n" |
| 52262 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52263 | "_mm_load1_ps(const float *__p)\n" |
| 52264 | "{\n" |
| 52265 | " struct __mm_load1_ps_struct {\n" |
| 52266 | " float __u;\n" |
| 52267 | " } __attribute__((__packed__, __may_alias__));\n" |
| 52268 | " float __u = ((struct __mm_load1_ps_struct*)__p)->__u;\n" |
| 52269 | " return __extension__ (__m128){ __u, __u, __u, __u };\n" |
| 52270 | "}\n" |
| 52271 | "\n" |
| 52272 | "#define _mm_load_ps1(p) _mm_load1_ps(p)\n" |
| 52273 | "\n" |
| 52274 | "/// Loads a 128-bit floating-point vector of [4 x float] from an aligned\n" |
| 52275 | "/// memory location.\n" |
| 52276 | "///\n" |
| 52277 | "/// \\headerfile <x86intrin.h>\n" |
| 52278 | "///\n" |
| 52279 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n" |
| 52280 | "///\n" |
| 52281 | "/// \\param __p\n" |
| 52282 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 52283 | "/// location has to be 128-bit aligned.\n" |
| 52284 | "/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n" |
| 52285 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52286 | "_mm_load_ps(const float *__p)\n" |
| 52287 | "{\n" |
| 52288 | " return *(__m128*)__p;\n" |
| 52289 | "}\n" |
| 52290 | "\n" |
| 52291 | "/// Loads a 128-bit floating-point vector of [4 x float] from an\n" |
| 52292 | "/// unaligned memory location.\n" |
| 52293 | "///\n" |
| 52294 | "/// \\headerfile <x86intrin.h>\n" |
| 52295 | "///\n" |
| 52296 | "/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n" |
| 52297 | "///\n" |
| 52298 | "/// \\param __p\n" |
| 52299 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 52300 | "/// location does not have to be aligned.\n" |
| 52301 | "/// \\returns A 128-bit vector of [4 x float] containing the loaded values.\n" |
| 52302 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52303 | "_mm_loadu_ps(const float *__p)\n" |
| 52304 | "{\n" |
| 52305 | " struct __loadu_ps {\n" |
| 52306 | " __m128 __v;\n" |
| 52307 | " } __attribute__((__packed__, __may_alias__));\n" |
| 52308 | " return ((struct __loadu_ps*)__p)->__v;\n" |
| 52309 | "}\n" |
| 52310 | "\n" |
| 52311 | "/// Loads four packed float values, in reverse order, from an aligned\n" |
| 52312 | "/// memory location to 32-bit elements in a 128-bit vector of [4 x float].\n" |
| 52313 | "///\n" |
| 52314 | "/// \\headerfile <x86intrin.h>\n" |
| 52315 | "///\n" |
| 52316 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n" |
| 52317 | "/// instruction.\n" |
| 52318 | "///\n" |
| 52319 | "/// \\param __p\n" |
| 52320 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 52321 | "/// location has to be 128-bit aligned.\n" |
| 52322 | "/// \\returns A 128-bit vector of [4 x float] containing the moved values, loaded\n" |
| 52323 | "/// in reverse order.\n" |
| 52324 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52325 | "_mm_loadr_ps(const float *__p)\n" |
| 52326 | "{\n" |
| 52327 | " __m128 __a = _mm_load_ps(__p);\n" |
| 52328 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n" |
| 52329 | "}\n" |
| 52330 | "\n" |
| 52331 | "/// Create a 128-bit vector of [4 x float] with undefined values.\n" |
| 52332 | "///\n" |
| 52333 | "/// \\headerfile <x86intrin.h>\n" |
| 52334 | "///\n" |
| 52335 | "/// This intrinsic has no corresponding instruction.\n" |
| 52336 | "///\n" |
| 52337 | "/// \\returns A 128-bit vector of [4 x float] containing undefined values.\n" |
| 52338 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52339 | "_mm_undefined_ps(void)\n" |
| 52340 | "{\n" |
| 52341 | " return (__m128)__builtin_ia32_undef128();\n" |
| 52342 | "}\n" |
| 52343 | "\n" |
| 52344 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
| 52345 | "/// 32 bits of the vector are initialized with the specified single-precision\n" |
| 52346 | "/// floating-point value. The upper 96 bits are set to zero.\n" |
| 52347 | "///\n" |
| 52348 | "/// \\headerfile <x86intrin.h>\n" |
| 52349 | "///\n" |
| 52350 | "/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n" |
| 52351 | "///\n" |
| 52352 | "/// \\param __w\n" |
| 52353 | "/// A single-precision floating-point value used to initialize the lower 32\n" |
| 52354 | "/// bits of the result.\n" |
| 52355 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float]. The\n" |
| 52356 | "/// lower 32 bits contain the value provided in the source operand. The\n" |
| 52357 | "/// upper 96 bits are set to zero.\n" |
| 52358 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52359 | "_mm_set_ss(float __w)\n" |
| 52360 | "{\n" |
| 52361 | " return __extension__ (__m128){ __w, 0, 0, 0 };\n" |
| 52362 | "}\n" |
| 52363 | "\n" |
| 52364 | "/// Constructs a 128-bit floating-point vector of [4 x float], with each\n" |
| 52365 | "/// of the four single-precision floating-point vector elements set to the\n" |
| 52366 | "/// specified single-precision floating-point value.\n" |
| 52367 | "///\n" |
| 52368 | "/// \\headerfile <x86intrin.h>\n" |
| 52369 | "///\n" |
| 52370 | "/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n" |
| 52371 | "///\n" |
| 52372 | "/// \\param __w\n" |
| 52373 | "/// A single-precision floating-point value used to initialize each vector\n" |
| 52374 | "/// element of the result.\n" |
| 52375 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n" |
| 52376 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52377 | "_mm_set1_ps(float __w)\n" |
| 52378 | "{\n" |
| 52379 | " return __extension__ (__m128){ __w, __w, __w, __w };\n" |
| 52380 | "}\n" |
| 52381 | "\n" |
| 52382 | "/* Microsoft specific. */\n" |
| 52383 | "/// Constructs a 128-bit floating-point vector of [4 x float], with each\n" |
| 52384 | "/// of the four single-precision floating-point vector elements set to the\n" |
| 52385 | "/// specified single-precision floating-point value.\n" |
| 52386 | "///\n" |
| 52387 | "/// \\headerfile <x86intrin.h>\n" |
| 52388 | "///\n" |
| 52389 | "/// This intrinsic corresponds to the <c> VPERMILPS / PERMILPS </c> instruction.\n" |
| 52390 | "///\n" |
| 52391 | "/// \\param __w\n" |
| 52392 | "/// A single-precision floating-point value used to initialize each vector\n" |
| 52393 | "/// element of the result.\n" |
| 52394 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n" |
| 52395 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52396 | "_mm_set_ps1(float __w)\n" |
| 52397 | "{\n" |
| 52398 | " return _mm_set1_ps(__w);\n" |
| 52399 | "}\n" |
| 52400 | "\n" |
| 52401 | "/// Constructs a 128-bit floating-point vector of [4 x float]\n" |
| 52402 | "/// initialized with the specified single-precision floating-point values.\n" |
| 52403 | "///\n" |
| 52404 | "/// \\headerfile <x86intrin.h>\n" |
| 52405 | "///\n" |
| 52406 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 52407 | "/// instruction.\n" |
| 52408 | "///\n" |
| 52409 | "/// \\param __z\n" |
| 52410 | "/// A single-precision floating-point value used to initialize bits [127:96]\n" |
| 52411 | "/// of the result.\n" |
| 52412 | "/// \\param __y\n" |
| 52413 | "/// A single-precision floating-point value used to initialize bits [95:64]\n" |
| 52414 | "/// of the result.\n" |
| 52415 | "/// \\param __x\n" |
| 52416 | "/// A single-precision floating-point value used to initialize bits [63:32]\n" |
| 52417 | "/// of the result.\n" |
| 52418 | "/// \\param __w\n" |
| 52419 | "/// A single-precision floating-point value used to initialize bits [31:0]\n" |
| 52420 | "/// of the result.\n" |
| 52421 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n" |
| 52422 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52423 | "_mm_set_ps(float __z, float __y, float __x, float __w)\n" |
| 52424 | "{\n" |
| 52425 | " return __extension__ (__m128){ __w, __x, __y, __z };\n" |
| 52426 | "}\n" |
| 52427 | "\n" |
| 52428 | "/// Constructs a 128-bit floating-point vector of [4 x float],\n" |
| 52429 | "/// initialized in reverse order with the specified 32-bit single-precision\n" |
| 52430 | "/// float-point values.\n" |
| 52431 | "///\n" |
| 52432 | "/// \\headerfile <x86intrin.h>\n" |
| 52433 | "///\n" |
| 52434 | "/// This intrinsic is a utility function and does not correspond to a specific\n" |
| 52435 | "/// instruction.\n" |
| 52436 | "///\n" |
| 52437 | "/// \\param __z\n" |
| 52438 | "/// A single-precision floating-point value used to initialize bits [31:0]\n" |
| 52439 | "/// of the result.\n" |
| 52440 | "/// \\param __y\n" |
| 52441 | "/// A single-precision floating-point value used to initialize bits [63:32]\n" |
| 52442 | "/// of the result.\n" |
| 52443 | "/// \\param __x\n" |
| 52444 | "/// A single-precision floating-point value used to initialize bits [95:64]\n" |
| 52445 | "/// of the result.\n" |
| 52446 | "/// \\param __w\n" |
| 52447 | "/// A single-precision floating-point value used to initialize bits [127:96]\n" |
| 52448 | "/// of the result.\n" |
| 52449 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float].\n" |
| 52450 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52451 | "_mm_setr_ps(float __z, float __y, float __x, float __w)\n" |
| 52452 | "{\n" |
| 52453 | " return __extension__ (__m128){ __z, __y, __x, __w };\n" |
| 52454 | "}\n" |
| 52455 | "\n" |
| 52456 | "/// Constructs a 128-bit floating-point vector of [4 x float] initialized\n" |
| 52457 | "/// to zero.\n" |
| 52458 | "///\n" |
| 52459 | "/// \\headerfile <x86intrin.h>\n" |
| 52460 | "///\n" |
| 52461 | "/// This intrinsic corresponds to the <c> VXORPS / XORPS </c> instruction.\n" |
| 52462 | "///\n" |
| 52463 | "/// \\returns An initialized 128-bit floating-point vector of [4 x float] with\n" |
| 52464 | "/// all elements set to zero.\n" |
| 52465 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 52466 | "_mm_setzero_ps(void)\n" |
| 52467 | "{\n" |
| 52468 | " return __extension__ (__m128){ 0, 0, 0, 0 };\n" |
| 52469 | "}\n" |
| 52470 | "\n" |
| 52471 | "/// Stores the upper 64 bits of a 128-bit vector of [4 x float] to a\n" |
| 52472 | "/// memory location.\n" |
| 52473 | "///\n" |
| 52474 | "/// \\headerfile <x86intrin.h>\n" |
| 52475 | "///\n" |
| 52476 | "/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction.\n" |
| 52477 | "///\n" |
| 52478 | "/// \\param __p\n" |
| 52479 | "/// A pointer to a 64-bit memory location.\n" |
| 52480 | "/// \\param __a\n" |
| 52481 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
| 52482 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52483 | "_mm_storeh_pi(__m64 *__p, __m128 __a)\n" |
| 52484 | "{\n" |
| 52485 | " __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a);\n" |
| 52486 | "}\n" |
| 52487 | "\n" |
| 52488 | "/// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a\n" |
| 52489 | "/// memory location.\n" |
| 52490 | "///\n" |
| 52491 | "/// \\headerfile <x86intrin.h>\n" |
| 52492 | "///\n" |
| 52493 | "/// This intrinsic corresponds to the <c> VMOVLPS / MOVLPS </c> instruction.\n" |
| 52494 | "///\n" |
| 52495 | "/// \\param __p\n" |
| 52496 | "/// A pointer to a memory location that will receive the float values.\n" |
| 52497 | "/// \\param __a\n" |
| 52498 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
| 52499 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52500 | "_mm_storel_pi(__m64 *__p, __m128 __a)\n" |
| 52501 | "{\n" |
| 52502 | " __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a);\n" |
| 52503 | "}\n" |
| 52504 | "\n" |
| 52505 | "/// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a\n" |
| 52506 | "/// memory location.\n" |
| 52507 | "///\n" |
| 52508 | "/// \\headerfile <x86intrin.h>\n" |
| 52509 | "///\n" |
| 52510 | "/// This intrinsic corresponds to the <c> VMOVSS / MOVSS </c> instruction.\n" |
| 52511 | "///\n" |
| 52512 | "/// \\param __p\n" |
| 52513 | "/// A pointer to a 32-bit memory location.\n" |
| 52514 | "/// \\param __a\n" |
| 52515 | "/// A 128-bit vector of [4 x float] containing the value to be stored.\n" |
| 52516 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52517 | "_mm_store_ss(float *__p, __m128 __a)\n" |
| 52518 | "{\n" |
| 52519 | " struct __mm_store_ss_struct {\n" |
| 52520 | " float __u;\n" |
| 52521 | " } __attribute__((__packed__, __may_alias__));\n" |
| 52522 | " ((struct __mm_store_ss_struct*)__p)->__u = __a[0];\n" |
| 52523 | "}\n" |
| 52524 | "\n" |
| 52525 | "/// Stores a 128-bit vector of [4 x float] to an unaligned memory\n" |
| 52526 | "/// location.\n" |
| 52527 | "///\n" |
| 52528 | "/// \\headerfile <x86intrin.h>\n" |
| 52529 | "///\n" |
| 52530 | "/// This intrinsic corresponds to the <c> VMOVUPS / MOVUPS </c> instruction.\n" |
| 52531 | "///\n" |
| 52532 | "/// \\param __p\n" |
| 52533 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 52534 | "/// location does not have to be aligned.\n" |
| 52535 | "/// \\param __a\n" |
| 52536 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
| 52537 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52538 | "_mm_storeu_ps(float *__p, __m128 __a)\n" |
| 52539 | "{\n" |
| 52540 | " struct __storeu_ps {\n" |
| 52541 | " __m128 __v;\n" |
| 52542 | " } __attribute__((__packed__, __may_alias__));\n" |
| 52543 | " ((struct __storeu_ps*)__p)->__v = __a;\n" |
| 52544 | "}\n" |
| 52545 | "\n" |
| 52546 | "/// Stores a 128-bit vector of [4 x float] into an aligned memory\n" |
| 52547 | "/// location.\n" |
| 52548 | "///\n" |
| 52549 | "/// \\headerfile <x86intrin.h>\n" |
| 52550 | "///\n" |
| 52551 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS </c> instruction.\n" |
| 52552 | "///\n" |
| 52553 | "/// \\param __p\n" |
| 52554 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 52555 | "/// location has to be 16-byte aligned.\n" |
| 52556 | "/// \\param __a\n" |
| 52557 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
| 52558 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52559 | "_mm_store_ps(float *__p, __m128 __a)\n" |
| 52560 | "{\n" |
| 52561 | " *(__m128*)__p = __a;\n" |
| 52562 | "}\n" |
| 52563 | "\n" |
| 52564 | "/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n" |
| 52565 | "/// four contiguous elements in an aligned memory location.\n" |
| 52566 | "///\n" |
| 52567 | "/// \\headerfile <x86intrin.h>\n" |
| 52568 | "///\n" |
| 52569 | "/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n" |
| 52570 | "/// instruction.\n" |
| 52571 | "///\n" |
| 52572 | "/// \\param __p\n" |
| 52573 | "/// A pointer to a 128-bit memory location.\n" |
| 52574 | "/// \\param __a\n" |
| 52575 | "/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n" |
| 52576 | "/// of the four contiguous elements pointed by \\a __p.\n" |
| 52577 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52578 | "_mm_store1_ps(float *__p, __m128 __a)\n" |
| 52579 | "{\n" |
| 52580 | " __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);\n" |
| 52581 | " _mm_store_ps(__p, __a);\n" |
| 52582 | "}\n" |
| 52583 | "\n" |
| 52584 | "/// Stores the lower 32 bits of a 128-bit vector of [4 x float] into\n" |
| 52585 | "/// four contiguous elements in an aligned memory location.\n" |
| 52586 | "///\n" |
| 52587 | "/// \\headerfile <x86intrin.h>\n" |
| 52588 | "///\n" |
| 52589 | "/// This intrinsic corresponds to <c> VMOVAPS / MOVAPS + shuffling </c>\n" |
| 52590 | "/// instruction.\n" |
| 52591 | "///\n" |
| 52592 | "/// \\param __p\n" |
| 52593 | "/// A pointer to a 128-bit memory location.\n" |
| 52594 | "/// \\param __a\n" |
| 52595 | "/// A 128-bit vector of [4 x float] whose lower 32 bits are stored to each\n" |
| 52596 | "/// of the four contiguous elements pointed by \\a __p.\n" |
| 52597 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52598 | "_mm_store_ps1(float *__p, __m128 __a)\n" |
| 52599 | "{\n" |
| 52600 | " _mm_store1_ps(__p, __a);\n" |
| 52601 | "}\n" |
| 52602 | "\n" |
| 52603 | "/// Stores float values from a 128-bit vector of [4 x float] to an\n" |
| 52604 | "/// aligned memory location in reverse order.\n" |
| 52605 | "///\n" |
| 52606 | "/// \\headerfile <x86intrin.h>\n" |
| 52607 | "///\n" |
| 52608 | "/// This intrinsic corresponds to the <c> VMOVAPS / MOVAPS + shuffling </c>\n" |
| 52609 | "/// instruction.\n" |
| 52610 | "///\n" |
| 52611 | "/// \\param __p\n" |
| 52612 | "/// A pointer to a 128-bit memory location. The address of the memory\n" |
| 52613 | "/// location has to be 128-bit aligned.\n" |
| 52614 | "/// \\param __a\n" |
| 52615 | "/// A 128-bit vector of [4 x float] containing the values to be stored.\n" |
| 52616 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52617 | "_mm_storer_ps(float *__p, __m128 __a)\n" |
| 52618 | "{\n" |
| 52619 | " __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);\n" |
| 52620 | " _mm_store_ps(__p, __a);\n" |
| 52621 | "}\n" |
| 52622 | "\n" |
| 52623 | "#define _MM_HINT_ET0 7\n" |
| 52624 | "#define _MM_HINT_ET1 6\n" |
| 52625 | "#define _MM_HINT_T0 3\n" |
| 52626 | "#define _MM_HINT_T1 2\n" |
| 52627 | "#define _MM_HINT_T2 1\n" |
| 52628 | "#define _MM_HINT_NTA 0\n" |
| 52629 | "\n" |
| 52630 | "#ifndef _MSC_VER\n" |
| 52631 | "/* FIXME: We have to #define this because \"sel\" must be a constant integer, and\n" |
| 52632 | " Sema doesn't do any form of constant propagation yet. */\n" |
| 52633 | "\n" |
| 52634 | "/// Loads one cache line of data from the specified address to a location\n" |
| 52635 | "/// closer to the processor.\n" |
| 52636 | "///\n" |
| 52637 | "/// \\headerfile <x86intrin.h>\n" |
| 52638 | "///\n" |
| 52639 | "/// \\code\n" |
| 52640 | "/// void _mm_prefetch(const void * a, const int sel);\n" |
| 52641 | "/// \\endcode\n" |
| 52642 | "///\n" |
| 52643 | "/// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction.\n" |
| 52644 | "///\n" |
| 52645 | "/// \\param a\n" |
| 52646 | "/// A pointer to a memory location containing a cache line of data.\n" |
| 52647 | "/// \\param sel\n" |
| 52648 | "/// A predefined integer constant specifying the type of prefetch\n" |
| 52649 | "/// operation: \\n\n" |
| 52650 | "/// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The\n" |
| 52651 | "/// PREFETCHNTA instruction will be generated. \\n\n" |
| 52652 | "/// _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will\n" |
| 52653 | "/// be generated. \\n\n" |
| 52654 | "/// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will\n" |
| 52655 | "/// be generated. \\n\n" |
| 52656 | "/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will\n" |
| 52657 | "/// be generated.\n" |
| 52658 | "#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), \\\n" |
| 52659 | " ((sel) >> 2) & 1, (sel) & 0x3))\n" |
| 52660 | "#endif\n" |
| 52661 | "\n" |
| 52662 | "/// Stores a 64-bit integer in the specified aligned memory location. To\n" |
| 52663 | "/// minimize caching, the data is flagged as non-temporal (unlikely to be\n" |
| 52664 | "/// used again soon).\n" |
| 52665 | "///\n" |
| 52666 | "/// \\headerfile <x86intrin.h>\n" |
| 52667 | "///\n" |
| 52668 | "/// This intrinsic corresponds to the <c> MOVNTQ </c> instruction.\n" |
| 52669 | "///\n" |
| 52670 | "/// \\param __p\n" |
| 52671 | "/// A pointer to an aligned memory location used to store the register value.\n" |
| 52672 | "/// \\param __a\n" |
| 52673 | "/// A 64-bit integer containing the value to be stored.\n" |
| 52674 | "static __inline__ void __DEFAULT_FN_ATTRS_MMX\n" |
| 52675 | "_mm_stream_pi(__m64 *__p, __m64 __a)\n" |
| 52676 | "{\n" |
| 52677 | " __builtin_ia32_movntq(__p, __a);\n" |
| 52678 | "}\n" |
| 52679 | "\n" |
| 52680 | "/// Moves packed float values from a 128-bit vector of [4 x float] to a\n" |
| 52681 | "/// 128-bit aligned memory location. To minimize caching, the data is flagged\n" |
| 52682 | "/// as non-temporal (unlikely to be used again soon).\n" |
| 52683 | "///\n" |
| 52684 | "/// \\headerfile <x86intrin.h>\n" |
| 52685 | "///\n" |
| 52686 | "/// This intrinsic corresponds to the <c> VMOVNTPS / MOVNTPS </c> instruction.\n" |
| 52687 | "///\n" |
| 52688 | "/// \\param __p\n" |
| 52689 | "/// A pointer to a 128-bit aligned memory location that will receive the\n" |
| 52690 | "/// single-precision floating-point values.\n" |
| 52691 | "/// \\param __a\n" |
| 52692 | "/// A 128-bit vector of [4 x float] containing the values to be moved.\n" |
| 52693 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 52694 | "_mm_stream_ps(float *__p, __m128 __a)\n" |
| 52695 | "{\n" |
| 52696 | " __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);\n" |
| 52697 | "}\n" |
| 52698 | "\n" |
| 52699 | "#if defined(__cplusplus)\n" |
| 52700 | "extern \"C\" {\n" |
| 52701 | "#endif\n" |
| 52702 | "\n" |
| 52703 | "/// Forces strong memory ordering (serialization) between store\n" |
| 52704 | "/// instructions preceding this instruction and store instructions following\n" |
| 52705 | "/// this instruction, ensuring the system completes all previous stores\n" |
| 52706 | "/// before executing subsequent stores.\n" |
| 52707 | "///\n" |
| 52708 | "/// \\headerfile <x86intrin.h>\n" |
| 52709 | "///\n" |
| 52710 | "/// This intrinsic corresponds to the <c> SFENCE </c> instruction.\n" |
| 52711 | "///\n" |
| 52712 | "void _mm_sfence(void);\n" |
| 52713 | "\n" |
| 52714 | "#if defined(__cplusplus)\n" |
| 52715 | "} // extern \"C\"\n" |
| 52716 | "#endif\n" |
| 52717 | "\n" |
| 52718 | "/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and\n" |
| 52719 | "/// returns it, as specified by the immediate integer operand.\n" |
| 52720 | "///\n" |
| 52721 | "/// \\headerfile <x86intrin.h>\n" |
| 52722 | "///\n" |
| 52723 | "/// \\code\n" |
| 52724 | "/// int _mm_extract_pi16(__m64 a, int n);\n" |
| 52725 | "/// \\endcode\n" |
| 52726 | "///\n" |
| 52727 | "/// This intrinsic corresponds to the <c> VPEXTRW / PEXTRW </c> instruction.\n" |
| 52728 | "///\n" |
| 52729 | "/// \\param a\n" |
| 52730 | "/// A 64-bit vector of [4 x i16].\n" |
| 52731 | "/// \\param n\n" |
| 52732 | "/// An immediate integer operand that determines which bits are extracted: \\n\n" |
| 52733 | "/// 0: Bits [15:0] are copied to the destination. \\n\n" |
| 52734 | "/// 1: Bits [31:16] are copied to the destination. \\n\n" |
| 52735 | "/// 2: Bits [47:32] are copied to the destination. \\n\n" |
| 52736 | "/// 3: Bits [63:48] are copied to the destination.\n" |
| 52737 | "/// \\returns A 16-bit integer containing the extracted 16 bits of packed data.\n" |
| 52738 | "#define _mm_extract_pi16(a, n) \\\n" |
| 52739 | " (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n)\n" |
| 52740 | "\n" |
| 52741 | "/// Copies data from the 64-bit vector of [4 x i16] to the destination,\n" |
| 52742 | "/// and inserts the lower 16-bits of an integer operand at the 16-bit offset\n" |
| 52743 | "/// specified by the immediate operand \\a n.\n" |
| 52744 | "///\n" |
| 52745 | "/// \\headerfile <x86intrin.h>\n" |
| 52746 | "///\n" |
| 52747 | "/// \\code\n" |
| 52748 | "/// __m64 _mm_insert_pi16(__m64 a, int d, int n);\n" |
| 52749 | "/// \\endcode\n" |
| 52750 | "///\n" |
| 52751 | "/// This intrinsic corresponds to the <c> PINSRW </c> instruction.\n" |
| 52752 | "///\n" |
| 52753 | "/// \\param a\n" |
| 52754 | "/// A 64-bit vector of [4 x i16].\n" |
| 52755 | "/// \\param d\n" |
| 52756 | "/// An integer. The lower 16-bit value from this operand is written to the\n" |
| 52757 | "/// destination at the offset specified by operand \\a n.\n" |
| 52758 | "/// \\param n\n" |
| 52759 | "/// An immediate integer operant that determines which the bits to be used\n" |
| 52760 | "/// in the destination. \\n\n" |
| 52761 | "/// 0: Bits [15:0] are copied to the destination. \\n\n" |
| 52762 | "/// 1: Bits [31:16] are copied to the destination. \\n\n" |
| 52763 | "/// 2: Bits [47:32] are copied to the destination. \\n\n" |
| 52764 | "/// 3: Bits [63:48] are copied to the destination. \\n\n" |
| 52765 | "/// The remaining bits in the destination are copied from the corresponding\n" |
| 52766 | "/// bits in operand \\a a.\n" |
| 52767 | "/// \\returns A 64-bit integer vector containing the copied packed data from the\n" |
| 52768 | "/// operands.\n" |
| 52769 | "#define _mm_insert_pi16(a, d, n) \\\n" |
| 52770 | " (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n)\n" |
| 52771 | "\n" |
| 52772 | "/// Compares each of the corresponding packed 16-bit integer values of\n" |
| 52773 | "/// the 64-bit integer vectors, and writes the greater value to the\n" |
| 52774 | "/// corresponding bits in the destination.\n" |
| 52775 | "///\n" |
| 52776 | "/// \\headerfile <x86intrin.h>\n" |
| 52777 | "///\n" |
| 52778 | "/// This intrinsic corresponds to the <c> PMAXSW </c> instruction.\n" |
| 52779 | "///\n" |
| 52780 | "/// \\param __a\n" |
| 52781 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52782 | "/// \\param __b\n" |
| 52783 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52784 | "/// \\returns A 64-bit integer vector containing the comparison results.\n" |
| 52785 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52786 | "_mm_max_pi16(__m64 __a, __m64 __b)\n" |
| 52787 | "{\n" |
| 52788 | " return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);\n" |
| 52789 | "}\n" |
| 52790 | "\n" |
| 52791 | "/// Compares each of the corresponding packed 8-bit unsigned integer\n" |
| 52792 | "/// values of the 64-bit integer vectors, and writes the greater value to the\n" |
| 52793 | "/// corresponding bits in the destination.\n" |
| 52794 | "///\n" |
| 52795 | "/// \\headerfile <x86intrin.h>\n" |
| 52796 | "///\n" |
| 52797 | "/// This intrinsic corresponds to the <c> PMAXUB </c> instruction.\n" |
| 52798 | "///\n" |
| 52799 | "/// \\param __a\n" |
| 52800 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52801 | "/// \\param __b\n" |
| 52802 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52803 | "/// \\returns A 64-bit integer vector containing the comparison results.\n" |
| 52804 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52805 | "_mm_max_pu8(__m64 __a, __m64 __b)\n" |
| 52806 | "{\n" |
| 52807 | " return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);\n" |
| 52808 | "}\n" |
| 52809 | "\n" |
| 52810 | "/// Compares each of the corresponding packed 16-bit integer values of\n" |
| 52811 | "/// the 64-bit integer vectors, and writes the lesser value to the\n" |
| 52812 | "/// corresponding bits in the destination.\n" |
| 52813 | "///\n" |
| 52814 | "/// \\headerfile <x86intrin.h>\n" |
| 52815 | "///\n" |
| 52816 | "/// This intrinsic corresponds to the <c> PMINSW </c> instruction.\n" |
| 52817 | "///\n" |
| 52818 | "/// \\param __a\n" |
| 52819 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52820 | "/// \\param __b\n" |
| 52821 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52822 | "/// \\returns A 64-bit integer vector containing the comparison results.\n" |
| 52823 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52824 | "_mm_min_pi16(__m64 __a, __m64 __b)\n" |
| 52825 | "{\n" |
| 52826 | " return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);\n" |
| 52827 | "}\n" |
| 52828 | "\n" |
| 52829 | "/// Compares each of the corresponding packed 8-bit unsigned integer\n" |
| 52830 | "/// values of the 64-bit integer vectors, and writes the lesser value to the\n" |
| 52831 | "/// corresponding bits in the destination.\n" |
| 52832 | "///\n" |
| 52833 | "/// \\headerfile <x86intrin.h>\n" |
| 52834 | "///\n" |
| 52835 | "/// This intrinsic corresponds to the <c> PMINUB </c> instruction.\n" |
| 52836 | "///\n" |
| 52837 | "/// \\param __a\n" |
| 52838 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52839 | "/// \\param __b\n" |
| 52840 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52841 | "/// \\returns A 64-bit integer vector containing the comparison results.\n" |
| 52842 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52843 | "_mm_min_pu8(__m64 __a, __m64 __b)\n" |
| 52844 | "{\n" |
| 52845 | " return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);\n" |
| 52846 | "}\n" |
| 52847 | "\n" |
| 52848 | "/// Takes the most significant bit from each 8-bit element in a 64-bit\n" |
| 52849 | "/// integer vector to create an 8-bit mask value. Zero-extends the value to\n" |
| 52850 | "/// 32-bit integer and writes it to the destination.\n" |
| 52851 | "///\n" |
| 52852 | "/// \\headerfile <x86intrin.h>\n" |
| 52853 | "///\n" |
| 52854 | "/// This intrinsic corresponds to the <c> PMOVMSKB </c> instruction.\n" |
| 52855 | "///\n" |
| 52856 | "/// \\param __a\n" |
| 52857 | "/// A 64-bit integer vector containing the values with bits to be extracted.\n" |
| 52858 | "/// \\returns The most significant bit from each 8-bit element in \\a __a,\n" |
| 52859 | "/// written to bits [7:0].\n" |
| 52860 | "static __inline__ int __DEFAULT_FN_ATTRS_MMX\n" |
| 52861 | "_mm_movemask_pi8(__m64 __a)\n" |
| 52862 | "{\n" |
| 52863 | " return __builtin_ia32_pmovmskb((__v8qi)__a);\n" |
| 52864 | "}\n" |
| 52865 | "\n" |
| 52866 | "/// Multiplies packed 16-bit unsigned integer values and writes the\n" |
| 52867 | "/// high-order 16 bits of each 32-bit product to the corresponding bits in\n" |
| 52868 | "/// the destination.\n" |
| 52869 | "///\n" |
| 52870 | "/// \\headerfile <x86intrin.h>\n" |
| 52871 | "///\n" |
| 52872 | "/// This intrinsic corresponds to the <c> PMULHUW </c> instruction.\n" |
| 52873 | "///\n" |
| 52874 | "/// \\param __a\n" |
| 52875 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52876 | "/// \\param __b\n" |
| 52877 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52878 | "/// \\returns A 64-bit integer vector containing the products of both operands.\n" |
| 52879 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52880 | "_mm_mulhi_pu16(__m64 __a, __m64 __b)\n" |
| 52881 | "{\n" |
| 52882 | " return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);\n" |
| 52883 | "}\n" |
| 52884 | "\n" |
| 52885 | "/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the\n" |
| 52886 | "/// destination, as specified by the immediate value operand.\n" |
| 52887 | "///\n" |
| 52888 | "/// \\headerfile <x86intrin.h>\n" |
| 52889 | "///\n" |
| 52890 | "/// \\code\n" |
| 52891 | "/// __m64 _mm_shuffle_pi16(__m64 a, const int n);\n" |
| 52892 | "/// \\endcode\n" |
| 52893 | "///\n" |
| 52894 | "/// This intrinsic corresponds to the <c> PSHUFW </c> instruction.\n" |
| 52895 | "///\n" |
| 52896 | "/// \\param a\n" |
| 52897 | "/// A 64-bit integer vector containing the values to be shuffled.\n" |
| 52898 | "/// \\param n\n" |
| 52899 | "/// An immediate value containing an 8-bit value specifying which elements to\n" |
| 52900 | "/// copy from \\a a. The destinations within the 64-bit destination are\n" |
| 52901 | "/// assigned values as follows: \\n\n" |
| 52902 | "/// Bits [1:0] are used to assign values to bits [15:0] in the\n" |
| 52903 | "/// destination. \\n\n" |
| 52904 | "/// Bits [3:2] are used to assign values to bits [31:16] in the\n" |
| 52905 | "/// destination. \\n\n" |
| 52906 | "/// Bits [5:4] are used to assign values to bits [47:32] in the\n" |
| 52907 | "/// destination. \\n\n" |
| 52908 | "/// Bits [7:6] are used to assign values to bits [63:48] in the\n" |
| 52909 | "/// destination. \\n\n" |
| 52910 | "/// Bit value assignments: \\n\n" |
| 52911 | "/// 00: assigned from bits [15:0] of \\a a. \\n\n" |
| 52912 | "/// 01: assigned from bits [31:16] of \\a a. \\n\n" |
| 52913 | "/// 10: assigned from bits [47:32] of \\a a. \\n\n" |
| 52914 | "/// 11: assigned from bits [63:48] of \\a a.\n" |
| 52915 | "/// \\returns A 64-bit integer vector containing the shuffled values.\n" |
| 52916 | "#define _mm_shuffle_pi16(a, n) \\\n" |
| 52917 | " (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))\n" |
| 52918 | "\n" |
| 52919 | "/// Conditionally copies the values from each 8-bit element in the first\n" |
| 52920 | "/// 64-bit integer vector operand to the specified memory location, as\n" |
| 52921 | "/// specified by the most significant bit in the corresponding element in the\n" |
| 52922 | "/// second 64-bit integer vector operand.\n" |
| 52923 | "///\n" |
| 52924 | "/// To minimize caching, the data is flagged as non-temporal\n" |
| 52925 | "/// (unlikely to be used again soon).\n" |
| 52926 | "///\n" |
| 52927 | "/// \\headerfile <x86intrin.h>\n" |
| 52928 | "///\n" |
| 52929 | "/// This intrinsic corresponds to the <c> MASKMOVQ </c> instruction.\n" |
| 52930 | "///\n" |
| 52931 | "/// \\param __d\n" |
| 52932 | "/// A 64-bit integer vector containing the values with elements to be copied.\n" |
| 52933 | "/// \\param __n\n" |
| 52934 | "/// A 64-bit integer vector operand. The most significant bit from each 8-bit\n" |
| 52935 | "/// element determines whether the corresponding element in operand \\a __d\n" |
| 52936 | "/// is copied. If the most significant bit of a given element is 1, the\n" |
| 52937 | "/// corresponding element in operand \\a __d is copied.\n" |
| 52938 | "/// \\param __p\n" |
| 52939 | "/// A pointer to a 64-bit memory location that will receive the conditionally\n" |
| 52940 | "/// copied integer values. The address of the memory location does not have\n" |
| 52941 | "/// to be aligned.\n" |
| 52942 | "static __inline__ void __DEFAULT_FN_ATTRS_MMX\n" |
| 52943 | "_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)\n" |
| 52944 | "{\n" |
| 52945 | " __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);\n" |
| 52946 | "}\n" |
| 52947 | "\n" |
| 52948 | "/// Computes the rounded averages of the packed unsigned 8-bit integer\n" |
| 52949 | "/// values and writes the averages to the corresponding bits in the\n" |
| 52950 | "/// destination.\n" |
| 52951 | "///\n" |
| 52952 | "/// \\headerfile <x86intrin.h>\n" |
| 52953 | "///\n" |
| 52954 | "/// This intrinsic corresponds to the <c> PAVGB </c> instruction.\n" |
| 52955 | "///\n" |
| 52956 | "/// \\param __a\n" |
| 52957 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52958 | "/// \\param __b\n" |
| 52959 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52960 | "/// \\returns A 64-bit integer vector containing the averages of both operands.\n" |
| 52961 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52962 | "_mm_avg_pu8(__m64 __a, __m64 __b)\n" |
| 52963 | "{\n" |
| 52964 | " return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);\n" |
| 52965 | "}\n" |
| 52966 | "\n" |
| 52967 | "/// Computes the rounded averages of the packed unsigned 16-bit integer\n" |
| 52968 | "/// values and writes the averages to the corresponding bits in the\n" |
| 52969 | "/// destination.\n" |
| 52970 | "///\n" |
| 52971 | "/// \\headerfile <x86intrin.h>\n" |
| 52972 | "///\n" |
| 52973 | "/// This intrinsic corresponds to the <c> PAVGW </c> instruction.\n" |
| 52974 | "///\n" |
| 52975 | "/// \\param __a\n" |
| 52976 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52977 | "/// \\param __b\n" |
| 52978 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52979 | "/// \\returns A 64-bit integer vector containing the averages of both operands.\n" |
| 52980 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 52981 | "_mm_avg_pu16(__m64 __a, __m64 __b)\n" |
| 52982 | "{\n" |
| 52983 | " return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);\n" |
| 52984 | "}\n" |
| 52985 | "\n" |
| 52986 | "/// Subtracts the corresponding 8-bit unsigned integer values of the two\n" |
| 52987 | "/// 64-bit vector operands and computes the absolute value for each of the\n" |
| 52988 | "/// difference. Then sum of the 8 absolute differences is written to the\n" |
| 52989 | "/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.\n" |
| 52990 | "///\n" |
| 52991 | "/// \\headerfile <x86intrin.h>\n" |
| 52992 | "///\n" |
| 52993 | "/// This intrinsic corresponds to the <c> PSADBW </c> instruction.\n" |
| 52994 | "///\n" |
| 52995 | "/// \\param __a\n" |
| 52996 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52997 | "/// \\param __b\n" |
| 52998 | "/// A 64-bit integer vector containing one of the source operands.\n" |
| 52999 | "/// \\returns A 64-bit integer vector whose lower 16 bits contain the sums of the\n" |
| 53000 | "/// sets of absolute differences between both operands. The upper bits are\n" |
| 53001 | "/// cleared.\n" |
| 53002 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 53003 | "_mm_sad_pu8(__m64 __a, __m64 __b)\n" |
| 53004 | "{\n" |
| 53005 | " return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);\n" |
| 53006 | "}\n" |
| 53007 | "\n" |
| 53008 | "#if defined(__cplusplus)\n" |
| 53009 | "extern \"C\" {\n" |
| 53010 | "#endif\n" |
| 53011 | "\n" |
| 53012 | "/// Returns the contents of the MXCSR register as a 32-bit unsigned\n" |
| 53013 | "/// integer value.\n" |
| 53014 | "///\n" |
| 53015 | "/// There are several groups of macros associated with this\n" |
| 53016 | "/// intrinsic, including:\n" |
| 53017 | "/// <ul>\n" |
| 53018 | "/// <li>\n" |
| 53019 | "/// For checking exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n" |
| 53020 | "/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n" |
| 53021 | "/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n" |
| 53022 | "/// _MM_GET_EXCEPTION_STATE().\n" |
| 53023 | "/// </li>\n" |
| 53024 | "/// <li>\n" |
| 53025 | "/// For checking exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n" |
| 53026 | "/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n" |
| 53027 | "/// There is a convenience wrapper _MM_GET_EXCEPTION_MASK().\n" |
| 53028 | "/// </li>\n" |
| 53029 | "/// <li>\n" |
| 53030 | "/// For checking rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n" |
| 53031 | "/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n" |
| 53032 | "/// _MM_GET_ROUNDING_MODE().\n" |
| 53033 | "/// </li>\n" |
| 53034 | "/// <li>\n" |
| 53035 | "/// For checking flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n" |
| 53036 | "/// There is a convenience wrapper _MM_GET_FLUSH_ZERO_MODE().\n" |
| 53037 | "/// </li>\n" |
| 53038 | "/// <li>\n" |
| 53039 | "/// For checking denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n" |
| 53040 | "/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n" |
| 53041 | "/// _MM_GET_DENORMALS_ZERO_MODE().\n" |
| 53042 | "/// </li>\n" |
| 53043 | "/// </ul>\n" |
| 53044 | "///\n" |
| 53045 | "/// For example, the following expression checks if an overflow exception has\n" |
| 53046 | "/// occurred:\n" |
| 53047 | "/// \\code\n" |
| 53048 | "/// ( _mm_getcsr() & _MM_EXCEPT_OVERFLOW )\n" |
| 53049 | "/// \\endcode\n" |
| 53050 | "///\n" |
| 53051 | "/// The following expression gets the current rounding mode:\n" |
| 53052 | "/// \\code\n" |
| 53053 | "/// _MM_GET_ROUNDING_MODE()\n" |
| 53054 | "/// \\endcode\n" |
| 53055 | "///\n" |
| 53056 | "/// \\headerfile <x86intrin.h>\n" |
| 53057 | "///\n" |
| 53058 | "/// This intrinsic corresponds to the <c> VSTMXCSR / STMXCSR </c> instruction.\n" |
| 53059 | "///\n" |
| 53060 | "/// \\returns A 32-bit unsigned integer containing the contents of the MXCSR\n" |
| 53061 | "/// register.\n" |
| 53062 | "unsigned int _mm_getcsr(void);\n" |
| 53063 | "\n" |
| 53064 | "/// Sets the MXCSR register with the 32-bit unsigned integer value.\n" |
| 53065 | "///\n" |
| 53066 | "/// There are several groups of macros associated with this intrinsic,\n" |
| 53067 | "/// including:\n" |
| 53068 | "/// <ul>\n" |
| 53069 | "/// <li>\n" |
| 53070 | "/// For setting exception states: _MM_EXCEPT_INVALID, _MM_EXCEPT_DIV_ZERO,\n" |
| 53071 | "/// _MM_EXCEPT_DENORM, _MM_EXCEPT_OVERFLOW, _MM_EXCEPT_UNDERFLOW,\n" |
| 53072 | "/// _MM_EXCEPT_INEXACT. There is a convenience wrapper\n" |
| 53073 | "/// _MM_SET_EXCEPTION_STATE(x) where x is one of these macros.\n" |
| 53074 | "/// </li>\n" |
| 53075 | "/// <li>\n" |
| 53076 | "/// For setting exception masks: _MM_MASK_UNDERFLOW, _MM_MASK_OVERFLOW,\n" |
| 53077 | "/// _MM_MASK_INVALID, _MM_MASK_DENORM, _MM_MASK_DIV_ZERO, _MM_MASK_INEXACT.\n" |
| 53078 | "/// There is a convenience wrapper _MM_SET_EXCEPTION_MASK(x) where x is one\n" |
| 53079 | "/// of these macros.\n" |
| 53080 | "/// </li>\n" |
| 53081 | "/// <li>\n" |
| 53082 | "/// For setting rounding modes: _MM_ROUND_NEAREST, _MM_ROUND_DOWN,\n" |
| 53083 | "/// _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO. There is a convenience wrapper\n" |
| 53084 | "/// _MM_SET_ROUNDING_MODE(x) where x is one of these macros.\n" |
| 53085 | "/// </li>\n" |
| 53086 | "/// <li>\n" |
| 53087 | "/// For setting flush-to-zero mode: _MM_FLUSH_ZERO_ON, _MM_FLUSH_ZERO_OFF.\n" |
| 53088 | "/// There is a convenience wrapper _MM_SET_FLUSH_ZERO_MODE(x) where x is\n" |
| 53089 | "/// one of these macros.\n" |
| 53090 | "/// </li>\n" |
| 53091 | "/// <li>\n" |
| 53092 | "/// For setting denormals-are-zero mode: _MM_DENORMALS_ZERO_ON,\n" |
| 53093 | "/// _MM_DENORMALS_ZERO_OFF. There is a convenience wrapper\n" |
| 53094 | "/// _MM_SET_DENORMALS_ZERO_MODE(x) where x is one of these macros.\n" |
| 53095 | "/// </li>\n" |
| 53096 | "/// </ul>\n" |
| 53097 | "///\n" |
| 53098 | "/// For example, the following expression causes subsequent floating-point\n" |
| 53099 | "/// operations to round up:\n" |
| 53100 | "/// _mm_setcsr(_mm_getcsr() | _MM_ROUND_UP)\n" |
| 53101 | "///\n" |
| 53102 | "/// The following example sets the DAZ and FTZ flags:\n" |
| 53103 | "/// \\code\n" |
| 53104 | "/// void setFlags() {\n" |
| 53105 | "/// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);\n" |
| 53106 | "/// _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);\n" |
| 53107 | "/// }\n" |
| 53108 | "/// \\endcode\n" |
| 53109 | "///\n" |
| 53110 | "/// \\headerfile <x86intrin.h>\n" |
| 53111 | "///\n" |
| 53112 | "/// This intrinsic corresponds to the <c> VLDMXCSR / LDMXCSR </c> instruction.\n" |
| 53113 | "///\n" |
| 53114 | "/// \\param __i\n" |
| 53115 | "/// A 32-bit unsigned integer value to be written to the MXCSR register.\n" |
| 53116 | "void _mm_setcsr(unsigned int __i);\n" |
| 53117 | "\n" |
| 53118 | "#if defined(__cplusplus)\n" |
| 53119 | "} // extern \"C\"\n" |
| 53120 | "#endif\n" |
| 53121 | "\n" |
| 53122 | "/// Selects 4 float values from the 128-bit operands of [4 x float], as\n" |
| 53123 | "/// specified by the immediate value operand.\n" |
| 53124 | "///\n" |
| 53125 | "/// \\headerfile <x86intrin.h>\n" |
| 53126 | "///\n" |
| 53127 | "/// \\code\n" |
| 53128 | "/// __m128 _mm_shuffle_ps(__m128 a, __m128 b, const int mask);\n" |
| 53129 | "/// \\endcode\n" |
| 53130 | "///\n" |
| 53131 | "/// This intrinsic corresponds to the <c> VSHUFPS / SHUFPS </c> instruction.\n" |
| 53132 | "///\n" |
| 53133 | "/// \\param a\n" |
| 53134 | "/// A 128-bit vector of [4 x float].\n" |
| 53135 | "/// \\param b\n" |
| 53136 | "/// A 128-bit vector of [4 x float].\n" |
| 53137 | "/// \\param mask\n" |
| 53138 | "/// An immediate value containing an 8-bit value specifying which elements to\n" |
| 53139 | "/// copy from \\a a and \\a b. \\n\n" |
| 53140 | "/// Bits [3:0] specify the values copied from operand \\a a. \\n\n" |
| 53141 | "/// Bits [7:4] specify the values copied from operand \\a b. \\n\n" |
| 53142 | "/// The destinations within the 128-bit destination are assigned values as\n" |
| 53143 | "/// follows: \\n\n" |
| 53144 | "/// Bits [1:0] are used to assign values to bits [31:0] in the\n" |
| 53145 | "/// destination. \\n\n" |
| 53146 | "/// Bits [3:2] are used to assign values to bits [63:32] in the\n" |
| 53147 | "/// destination. \\n\n" |
| 53148 | "/// Bits [5:4] are used to assign values to bits [95:64] in the\n" |
| 53149 | "/// destination. \\n\n" |
| 53150 | "/// Bits [7:6] are used to assign values to bits [127:96] in the\n" |
| 53151 | "/// destination. \\n\n" |
| 53152 | "/// Bit value assignments: \\n\n" |
| 53153 | "/// 00: Bits [31:0] copied from the specified operand. \\n\n" |
| 53154 | "/// 01: Bits [63:32] copied from the specified operand. \\n\n" |
| 53155 | "/// 10: Bits [95:64] copied from the specified operand. \\n\n" |
| 53156 | "/// 11: Bits [127:96] copied from the specified operand.\n" |
| 53157 | "/// \\returns A 128-bit vector of [4 x float] containing the shuffled values.\n" |
| 53158 | "#define _mm_shuffle_ps(a, b, mask) \\\n" |
| 53159 | " (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \\\n" |
| 53160 | " (int)(mask))\n" |
| 53161 | "\n" |
| 53162 | "/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of\n" |
| 53163 | "/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n" |
| 53164 | "///\n" |
| 53165 | "/// \\headerfile <x86intrin.h>\n" |
| 53166 | "///\n" |
| 53167 | "/// This intrinsic corresponds to the <c> VUNPCKHPS / UNPCKHPS </c> instruction.\n" |
| 53168 | "///\n" |
| 53169 | "/// \\param __a\n" |
| 53170 | "/// A 128-bit vector of [4 x float]. \\n\n" |
| 53171 | "/// Bits [95:64] are written to bits [31:0] of the destination. \\n\n" |
| 53172 | "/// Bits [127:96] are written to bits [95:64] of the destination.\n" |
| 53173 | "/// \\param __b\n" |
| 53174 | "/// A 128-bit vector of [4 x float].\n" |
| 53175 | "/// Bits [95:64] are written to bits [63:32] of the destination. \\n\n" |
| 53176 | "/// Bits [127:96] are written to bits [127:96] of the destination.\n" |
| 53177 | "/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n" |
| 53178 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 53179 | "_mm_unpackhi_ps(__m128 __a, __m128 __b)\n" |
| 53180 | "{\n" |
| 53181 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);\n" |
| 53182 | "}\n" |
| 53183 | "\n" |
| 53184 | "/// Unpacks the low-order (index 0,1) values from two 128-bit vectors of\n" |
| 53185 | "/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].\n" |
| 53186 | "///\n" |
| 53187 | "/// \\headerfile <x86intrin.h>\n" |
| 53188 | "///\n" |
| 53189 | "/// This intrinsic corresponds to the <c> VUNPCKLPS / UNPCKLPS </c> instruction.\n" |
| 53190 | "///\n" |
| 53191 | "/// \\param __a\n" |
| 53192 | "/// A 128-bit vector of [4 x float]. \\n\n" |
| 53193 | "/// Bits [31:0] are written to bits [31:0] of the destination. \\n\n" |
| 53194 | "/// Bits [63:32] are written to bits [95:64] of the destination.\n" |
| 53195 | "/// \\param __b\n" |
| 53196 | "/// A 128-bit vector of [4 x float]. \\n\n" |
| 53197 | "/// Bits [31:0] are written to bits [63:32] of the destination. \\n\n" |
| 53198 | "/// Bits [63:32] are written to bits [127:96] of the destination.\n" |
| 53199 | "/// \\returns A 128-bit vector of [4 x float] containing the interleaved values.\n" |
| 53200 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 53201 | "_mm_unpacklo_ps(__m128 __a, __m128 __b)\n" |
| 53202 | "{\n" |
| 53203 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);\n" |
| 53204 | "}\n" |
| 53205 | "\n" |
| 53206 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
| 53207 | "/// 32 bits are set to the lower 32 bits of the second parameter. The upper\n" |
| 53208 | "/// 96 bits are set to the upper 96 bits of the first parameter.\n" |
| 53209 | "///\n" |
| 53210 | "/// \\headerfile <x86intrin.h>\n" |
| 53211 | "///\n" |
| 53212 | "/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS / MOVSS </c>\n" |
| 53213 | "/// instruction.\n" |
| 53214 | "///\n" |
| 53215 | "/// \\param __a\n" |
| 53216 | "/// A 128-bit floating-point vector of [4 x float]. The upper 96 bits are\n" |
| 53217 | "/// written to the upper 96 bits of the result.\n" |
| 53218 | "/// \\param __b\n" |
| 53219 | "/// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are\n" |
| 53220 | "/// written to the lower 32 bits of the result.\n" |
| 53221 | "/// \\returns A 128-bit floating-point vector of [4 x float].\n" |
| 53222 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 53223 | "_mm_move_ss(__m128 __a, __m128 __b)\n" |
| 53224 | "{\n" |
| 53225 | " __a[0] = __b[0];\n" |
| 53226 | " return __a;\n" |
| 53227 | "}\n" |
| 53228 | "\n" |
| 53229 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
| 53230 | "/// 64 bits are set to the upper 64 bits of the second parameter. The upper\n" |
| 53231 | "/// 64 bits are set to the upper 64 bits of the first parameter.\n" |
| 53232 | "///\n" |
| 53233 | "/// \\headerfile <x86intrin.h>\n" |
| 53234 | "///\n" |
| 53235 | "/// This intrinsic corresponds to the <c> VUNPCKHPD / UNPCKHPD </c> instruction.\n" |
| 53236 | "///\n" |
| 53237 | "/// \\param __a\n" |
| 53238 | "/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n" |
| 53239 | "/// written to the upper 64 bits of the result.\n" |
| 53240 | "/// \\param __b\n" |
| 53241 | "/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are\n" |
| 53242 | "/// written to the lower 64 bits of the result.\n" |
| 53243 | "/// \\returns A 128-bit floating-point vector of [4 x float].\n" |
| 53244 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 53245 | "_mm_movehl_ps(__m128 __a, __m128 __b)\n" |
| 53246 | "{\n" |
| 53247 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);\n" |
| 53248 | "}\n" |
| 53249 | "\n" |
| 53250 | "/// Constructs a 128-bit floating-point vector of [4 x float]. The lower\n" |
| 53251 | "/// 64 bits are set to the lower 64 bits of the first parameter. The upper\n" |
| 53252 | "/// 64 bits are set to the lower 64 bits of the second parameter.\n" |
| 53253 | "///\n" |
| 53254 | "/// \\headerfile <x86intrin.h>\n" |
| 53255 | "///\n" |
| 53256 | "/// This intrinsic corresponds to the <c> VUNPCKLPD / UNPCKLPD </c> instruction.\n" |
| 53257 | "///\n" |
| 53258 | "/// \\param __a\n" |
| 53259 | "/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n" |
| 53260 | "/// written to the lower 64 bits of the result.\n" |
| 53261 | "/// \\param __b\n" |
| 53262 | "/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are\n" |
| 53263 | "/// written to the upper 64 bits of the result.\n" |
| 53264 | "/// \\returns A 128-bit floating-point vector of [4 x float].\n" |
| 53265 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 53266 | "_mm_movelh_ps(__m128 __a, __m128 __b)\n" |
| 53267 | "{\n" |
| 53268 | " return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);\n" |
| 53269 | "}\n" |
| 53270 | "\n" |
| 53271 | "/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x\n" |
| 53272 | "/// float].\n" |
| 53273 | "///\n" |
| 53274 | "/// \\headerfile <x86intrin.h>\n" |
| 53275 | "///\n" |
| 53276 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
| 53277 | "///\n" |
| 53278 | "/// \\param __a\n" |
| 53279 | "/// A 64-bit vector of [4 x i16]. The elements of the destination are copied\n" |
| 53280 | "/// from the corresponding elements in this operand.\n" |
| 53281 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n" |
| 53282 | "/// values from the operand.\n" |
| 53283 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
| 53284 | "_mm_cvtpi16_ps(__m64 __a)\n" |
| 53285 | "{\n" |
| 53286 | " __m64 __b, __c;\n" |
| 53287 | " __m128 __r;\n" |
| 53288 | "\n" |
| 53289 | " __b = _mm_setzero_si64();\n" |
| 53290 | " __b = _mm_cmpgt_pi16(__b, __a);\n" |
| 53291 | " __c = _mm_unpackhi_pi16(__a, __b);\n" |
| 53292 | " __r = _mm_setzero_ps();\n" |
| 53293 | " __r = _mm_cvtpi32_ps(__r, __c);\n" |
| 53294 | " __r = _mm_movelh_ps(__r, __r);\n" |
| 53295 | " __c = _mm_unpacklo_pi16(__a, __b);\n" |
| 53296 | " __r = _mm_cvtpi32_ps(__r, __c);\n" |
| 53297 | "\n" |
| 53298 | " return __r;\n" |
| 53299 | "}\n" |
| 53300 | "\n" |
| 53301 | "/// Converts a 64-bit vector of 16-bit unsigned integer values into a\n" |
| 53302 | "/// 128-bit vector of [4 x float].\n" |
| 53303 | "///\n" |
| 53304 | "/// \\headerfile <x86intrin.h>\n" |
| 53305 | "///\n" |
| 53306 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
| 53307 | "///\n" |
| 53308 | "/// \\param __a\n" |
| 53309 | "/// A 64-bit vector of 16-bit unsigned integer values. The elements of the\n" |
| 53310 | "/// destination are copied from the corresponding elements in this operand.\n" |
| 53311 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n" |
| 53312 | "/// values from the operand.\n" |
| 53313 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
| 53314 | "_mm_cvtpu16_ps(__m64 __a)\n" |
| 53315 | "{\n" |
| 53316 | " __m64 __b, __c;\n" |
| 53317 | " __m128 __r;\n" |
| 53318 | "\n" |
| 53319 | " __b = _mm_setzero_si64();\n" |
| 53320 | " __c = _mm_unpackhi_pi16(__a, __b);\n" |
| 53321 | " __r = _mm_setzero_ps();\n" |
| 53322 | " __r = _mm_cvtpi32_ps(__r, __c);\n" |
| 53323 | " __r = _mm_movelh_ps(__r, __r);\n" |
| 53324 | " __c = _mm_unpacklo_pi16(__a, __b);\n" |
| 53325 | " __r = _mm_cvtpi32_ps(__r, __c);\n" |
| 53326 | "\n" |
| 53327 | " return __r;\n" |
| 53328 | "}\n" |
| 53329 | "\n" |
| 53330 | "/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]\n" |
| 53331 | "/// into a 128-bit vector of [4 x float].\n" |
| 53332 | "///\n" |
| 53333 | "/// \\headerfile <x86intrin.h>\n" |
| 53334 | "///\n" |
| 53335 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
| 53336 | "///\n" |
| 53337 | "/// \\param __a\n" |
| 53338 | "/// A 64-bit vector of [8 x i8]. The elements of the destination are copied\n" |
| 53339 | "/// from the corresponding lower 4 elements in this operand.\n" |
| 53340 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n" |
| 53341 | "/// values from the operand.\n" |
| 53342 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
| 53343 | "_mm_cvtpi8_ps(__m64 __a)\n" |
| 53344 | "{\n" |
| 53345 | " __m64 __b;\n" |
| 53346 | "\n" |
| 53347 | " __b = _mm_setzero_si64();\n" |
| 53348 | " __b = _mm_cmpgt_pi8(__b, __a);\n" |
| 53349 | " __b = _mm_unpacklo_pi8(__a, __b);\n" |
| 53350 | "\n" |
| 53351 | " return _mm_cvtpi16_ps(__b);\n" |
| 53352 | "}\n" |
| 53353 | "\n" |
| 53354 | "/// Converts the lower four unsigned 8-bit integer values from a 64-bit\n" |
| 53355 | "/// vector of [8 x u8] into a 128-bit vector of [4 x float].\n" |
| 53356 | "///\n" |
| 53357 | "/// \\headerfile <x86intrin.h>\n" |
| 53358 | "///\n" |
| 53359 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
| 53360 | "///\n" |
| 53361 | "/// \\param __a\n" |
| 53362 | "/// A 64-bit vector of unsigned 8-bit integer values. The elements of the\n" |
| 53363 | "/// destination are copied from the corresponding lower 4 elements in this\n" |
| 53364 | "/// operand.\n" |
| 53365 | "/// \\returns A 128-bit vector of [4 x float] containing the copied and converted\n" |
| 53366 | "/// values from the source operand.\n" |
| 53367 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
| 53368 | "_mm_cvtpu8_ps(__m64 __a)\n" |
| 53369 | "{\n" |
| 53370 | " __m64 __b;\n" |
| 53371 | "\n" |
| 53372 | " __b = _mm_setzero_si64();\n" |
| 53373 | " __b = _mm_unpacklo_pi8(__a, __b);\n" |
| 53374 | "\n" |
| 53375 | " return _mm_cvtpi16_ps(__b);\n" |
| 53376 | "}\n" |
| 53377 | "\n" |
| 53378 | "/// Converts the two 32-bit signed integer values from each 64-bit vector\n" |
| 53379 | "/// operand of [2 x i32] into a 128-bit vector of [4 x float].\n" |
| 53380 | "///\n" |
| 53381 | "/// \\headerfile <x86intrin.h>\n" |
| 53382 | "///\n" |
| 53383 | "/// This intrinsic corresponds to the <c> CVTPI2PS + COMPOSITE </c> instruction.\n" |
| 53384 | "///\n" |
| 53385 | "/// \\param __a\n" |
| 53386 | "/// A 64-bit vector of [2 x i32]. The lower elements of the destination are\n" |
| 53387 | "/// copied from the elements in this operand.\n" |
| 53388 | "/// \\param __b\n" |
| 53389 | "/// A 64-bit vector of [2 x i32]. The upper elements of the destination are\n" |
| 53390 | "/// copied from the elements in this operand.\n" |
| 53391 | "/// \\returns A 128-bit vector of [4 x float] whose lower 64 bits contain the\n" |
| 53392 | "/// copied and converted values from the first operand. The upper 64 bits\n" |
| 53393 | "/// contain the copied and converted values from the second operand.\n" |
| 53394 | "static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX\n" |
| 53395 | "_mm_cvtpi32x2_ps(__m64 __a, __m64 __b)\n" |
| 53396 | "{\n" |
| 53397 | " __m128 __c;\n" |
| 53398 | "\n" |
| 53399 | " __c = _mm_setzero_ps();\n" |
| 53400 | " __c = _mm_cvtpi32_ps(__c, __b);\n" |
| 53401 | " __c = _mm_movelh_ps(__c, __c);\n" |
| 53402 | "\n" |
| 53403 | " return _mm_cvtpi32_ps(__c, __a);\n" |
| 53404 | "}\n" |
| 53405 | "\n" |
| 53406 | "/// Converts each single-precision floating-point element of a 128-bit\n" |
| 53407 | "/// floating-point vector of [4 x float] into a 16-bit signed integer, and\n" |
| 53408 | "/// packs the results into a 64-bit integer vector of [4 x i16].\n" |
| 53409 | "///\n" |
| 53410 | "/// If the floating-point element is NaN or infinity, or if the\n" |
| 53411 | "/// floating-point element is greater than 0x7FFFFFFF or less than -0x8000,\n" |
| 53412 | "/// it is converted to 0x8000. Otherwise if the floating-point element is\n" |
| 53413 | "/// greater than 0x7FFF, it is converted to 0x7FFF.\n" |
| 53414 | "///\n" |
| 53415 | "/// \\headerfile <x86intrin.h>\n" |
| 53416 | "///\n" |
| 53417 | "/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n" |
| 53418 | "///\n" |
| 53419 | "/// \\param __a\n" |
| 53420 | "/// A 128-bit floating-point vector of [4 x float].\n" |
| 53421 | "/// \\returns A 64-bit integer vector of [4 x i16] containing the converted\n" |
| 53422 | "/// values.\n" |
| 53423 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 53424 | "_mm_cvtps_pi16(__m128 __a)\n" |
| 53425 | "{\n" |
| 53426 | " __m64 __b, __c;\n" |
| 53427 | "\n" |
| 53428 | " __b = _mm_cvtps_pi32(__a);\n" |
| 53429 | " __a = _mm_movehl_ps(__a, __a);\n" |
| 53430 | " __c = _mm_cvtps_pi32(__a);\n" |
| 53431 | "\n" |
| 53432 | " return _mm_packs_pi32(__b, __c);\n" |
| 53433 | "}\n" |
| 53434 | "\n" |
| 53435 | "/// Converts each single-precision floating-point element of a 128-bit\n" |
| 53436 | "/// floating-point vector of [4 x float] into an 8-bit signed integer, and\n" |
| 53437 | "/// packs the results into the lower 32 bits of a 64-bit integer vector of\n" |
| 53438 | "/// [8 x i8]. The upper 32 bits of the vector are set to 0.\n" |
| 53439 | "///\n" |
| 53440 | "/// If the floating-point element is NaN or infinity, or if the\n" |
| 53441 | "/// floating-point element is greater than 0x7FFFFFFF or less than -0x80, it\n" |
| 53442 | "/// is converted to 0x80. Otherwise if the floating-point element is greater\n" |
| 53443 | "/// than 0x7F, it is converted to 0x7F.\n" |
| 53444 | "///\n" |
| 53445 | "/// \\headerfile <x86intrin.h>\n" |
| 53446 | "///\n" |
| 53447 | "/// This intrinsic corresponds to the <c> CVTPS2PI + COMPOSITE </c> instruction.\n" |
| 53448 | "///\n" |
| 53449 | "/// \\param __a\n" |
| 53450 | "/// 128-bit floating-point vector of [4 x float].\n" |
| 53451 | "/// \\returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the\n" |
| 53452 | "/// converted values and the uppper 32 bits are set to zero.\n" |
| 53453 | "static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX\n" |
| 53454 | "_mm_cvtps_pi8(__m128 __a)\n" |
| 53455 | "{\n" |
| 53456 | " __m64 __b, __c;\n" |
| 53457 | "\n" |
| 53458 | " __b = _mm_cvtps_pi16(__a);\n" |
| 53459 | " __c = _mm_setzero_si64();\n" |
| 53460 | "\n" |
| 53461 | " return _mm_packs_pi16(__b, __c);\n" |
| 53462 | "}\n" |
| 53463 | "\n" |
| 53464 | "/// Extracts the sign bits from each single-precision floating-point\n" |
| 53465 | "/// element of a 128-bit floating-point vector of [4 x float] and returns the\n" |
| 53466 | "/// sign bits in bits [0:3] of the result. Bits [31:4] of the result are set\n" |
| 53467 | "/// to zero.\n" |
| 53468 | "///\n" |
| 53469 | "/// \\headerfile <x86intrin.h>\n" |
| 53470 | "///\n" |
| 53471 | "/// This intrinsic corresponds to the <c> VMOVMSKPS / MOVMSKPS </c> instruction.\n" |
| 53472 | "///\n" |
| 53473 | "/// \\param __a\n" |
| 53474 | "/// A 128-bit floating-point vector of [4 x float].\n" |
| 53475 | "/// \\returns A 32-bit integer value. Bits [3:0] contain the sign bits from each\n" |
| 53476 | "/// single-precision floating-point element of the parameter. Bits [31:4] are\n" |
| 53477 | "/// set to zero.\n" |
| 53478 | "static __inline__ int __DEFAULT_FN_ATTRS\n" |
| 53479 | "_mm_movemask_ps(__m128 __a)\n" |
| 53480 | "{\n" |
| 53481 | " return __builtin_ia32_movmskps((__v4sf)__a);\n" |
| 53482 | "}\n" |
| 53483 | "\n" |
| 53484 | "\n" |
| 53485 | "#define _MM_ALIGN16 __attribute__((aligned(16)))\n" |
| 53486 | "\n" |
| 53487 | "#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))\n" |
| 53488 | "\n" |
| 53489 | "#define _MM_EXCEPT_INVALID (0x0001)\n" |
| 53490 | "#define _MM_EXCEPT_DENORM (0x0002)\n" |
| 53491 | "#define _MM_EXCEPT_DIV_ZERO (0x0004)\n" |
| 53492 | "#define _MM_EXCEPT_OVERFLOW (0x0008)\n" |
| 53493 | "#define _MM_EXCEPT_UNDERFLOW (0x0010)\n" |
| 53494 | "#define _MM_EXCEPT_INEXACT (0x0020)\n" |
| 53495 | "#define _MM_EXCEPT_MASK (0x003f)\n" |
| 53496 | "\n" |
| 53497 | "#define _MM_MASK_INVALID (0x0080)\n" |
| 53498 | "#define _MM_MASK_DENORM (0x0100)\n" |
| 53499 | "#define _MM_MASK_DIV_ZERO (0x0200)\n" |
| 53500 | "#define _MM_MASK_OVERFLOW (0x0400)\n" |
| 53501 | "#define _MM_MASK_UNDERFLOW (0x0800)\n" |
| 53502 | "#define _MM_MASK_INEXACT (0x1000)\n" |
| 53503 | "#define _MM_MASK_MASK (0x1f80)\n" |
| 53504 | "\n" |
| 53505 | "#define _MM_ROUND_NEAREST (0x0000)\n" |
| 53506 | "#define _MM_ROUND_DOWN (0x2000)\n" |
| 53507 | "#define _MM_ROUND_UP (0x4000)\n" |
| 53508 | "#define _MM_ROUND_TOWARD_ZERO (0x6000)\n" |
| 53509 | "#define _MM_ROUND_MASK (0x6000)\n" |
| 53510 | "\n" |
| 53511 | "#define _MM_FLUSH_ZERO_MASK (0x8000)\n" |
| 53512 | "#define _MM_FLUSH_ZERO_ON (0x8000)\n" |
| 53513 | "#define _MM_FLUSH_ZERO_OFF (0x0000)\n" |
| 53514 | "\n" |
| 53515 | "#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)\n" |
| 53516 | "#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)\n" |
| 53517 | "#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)\n" |
| 53518 | "#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)\n" |
| 53519 | "\n" |
| 53520 | "#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))\n" |
| 53521 | "#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))\n" |
| 53522 | "#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))\n" |
| 53523 | "#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))\n" |
| 53524 | "\n" |
| 53525 | "#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \\\n" |
| 53526 | "do { \\\n" |
| 53527 | " __m128 tmp3, tmp2, tmp1, tmp0; \\\n" |
| 53528 | " tmp0 = _mm_unpacklo_ps((row0), (row1)); \\\n" |
| 53529 | " tmp2 = _mm_unpacklo_ps((row2), (row3)); \\\n" |
| 53530 | " tmp1 = _mm_unpackhi_ps((row0), (row1)); \\\n" |
| 53531 | " tmp3 = _mm_unpackhi_ps((row2), (row3)); \\\n" |
| 53532 | " (row0) = _mm_movelh_ps(tmp0, tmp2); \\\n" |
| 53533 | " (row1) = _mm_movehl_ps(tmp2, tmp0); \\\n" |
| 53534 | " (row2) = _mm_movelh_ps(tmp1, tmp3); \\\n" |
| 53535 | " (row3) = _mm_movehl_ps(tmp3, tmp1); \\\n" |
| 53536 | "} while (0)\n" |
| 53537 | "\n" |
| 53538 | "/* Aliases for compatibility. */\n" |
| 53539 | "#define _m_pextrw _mm_extract_pi16\n" |
| 53540 | "#define _m_pinsrw _mm_insert_pi16\n" |
| 53541 | "#define _m_pmaxsw _mm_max_pi16\n" |
| 53542 | "#define _m_pmaxub _mm_max_pu8\n" |
| 53543 | "#define _m_pminsw _mm_min_pi16\n" |
| 53544 | "#define _m_pminub _mm_min_pu8\n" |
| 53545 | "#define _m_pmovmskb _mm_movemask_pi8\n" |
| 53546 | "#define _m_pmulhuw _mm_mulhi_pu16\n" |
| 53547 | "#define _m_pshufw _mm_shuffle_pi16\n" |
| 53548 | "#define _m_maskmovq _mm_maskmove_si64\n" |
| 53549 | "#define _m_pavgb _mm_avg_pu8\n" |
| 53550 | "#define _m_pavgw _mm_avg_pu16\n" |
| 53551 | "#define _m_psadbw _mm_sad_pu8\n" |
| 53552 | "#define _m_ _mm_\n" |
| 53553 | "#define _m_ _mm_\n" |
| 53554 | "\n" |
| 53555 | "#undef __DEFAULT_FN_ATTRS\n" |
| 53556 | "#undef __DEFAULT_FN_ATTRS_MMX\n" |
| 53557 | "\n" |
| 53558 | "/* Ugly hack for backwards-compatibility (compatible with gcc) */\n" |
| 53559 | "#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)\n" |
| 53560 | "#include <emmintrin.h>\n" |
| 53561 | "#endif\n" |
| 53562 | "\n" |
| 53563 | "#endif /* __XMMINTRIN_H */\n" |
| 53564 | "" } , |
| 53565 | { "/builtins/xopintrin.h" , "/*===---- xopintrin.h - XOP intrinsics -------------------------------------===\n" |
| 53566 | " *\n" |
| 53567 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 53568 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 53569 | " * in the Software without restriction, including without limitation the rights\n" |
| 53570 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 53571 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 53572 | " * furnished to do so, subject to the following conditions:\n" |
| 53573 | " *\n" |
| 53574 | " * The above copyright notice and this permission notice shall be included in\n" |
| 53575 | " * all copies or substantial portions of the Software.\n" |
| 53576 | " *\n" |
| 53577 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 53578 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 53579 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 53580 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 53581 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 53582 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 53583 | " * THE SOFTWARE.\n" |
| 53584 | " *\n" |
| 53585 | " *===-----------------------------------------------------------------------===\n" |
| 53586 | " */\n" |
| 53587 | "\n" |
| 53588 | "#ifndef __X86INTRIN_H\n" |
| 53589 | "#error \"Never use <xopintrin.h> directly; include <x86intrin.h> instead.\"\n" |
| 53590 | "#endif\n" |
| 53591 | "\n" |
| 53592 | "#ifndef __XOPINTRIN_H\n" |
| 53593 | "#define __XOPINTRIN_H\n" |
| 53594 | "\n" |
| 53595 | "#include <fma4intrin.h>\n" |
| 53596 | "\n" |
| 53597 | "/* Define the default attributes for the functions in this file. */\n" |
| 53598 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(128)))\n" |
| 53599 | "#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__(\"xop\"), __min_vector_width__(256)))\n" |
| 53600 | "\n" |
| 53601 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53602 | "_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53603 | "{\n" |
| 53604 | " return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n" |
| 53605 | "}\n" |
| 53606 | "\n" |
| 53607 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53608 | "_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53609 | "{\n" |
| 53610 | " return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);\n" |
| 53611 | "}\n" |
| 53612 | "\n" |
| 53613 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53614 | "_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53615 | "{\n" |
| 53616 | " return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n" |
| 53617 | "}\n" |
| 53618 | "\n" |
| 53619 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53620 | "_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53621 | "{\n" |
| 53622 | " return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n" |
| 53623 | "}\n" |
| 53624 | "\n" |
| 53625 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53626 | "_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53627 | "{\n" |
| 53628 | " return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n" |
| 53629 | "}\n" |
| 53630 | "\n" |
| 53631 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53632 | "_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53633 | "{\n" |
| 53634 | " return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);\n" |
| 53635 | "}\n" |
| 53636 | "\n" |
| 53637 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53638 | "_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53639 | "{\n" |
| 53640 | " return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n" |
| 53641 | "}\n" |
| 53642 | "\n" |
| 53643 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53644 | "_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53645 | "{\n" |
| 53646 | " return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);\n" |
| 53647 | "}\n" |
| 53648 | "\n" |
| 53649 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53650 | "_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53651 | "{\n" |
| 53652 | " return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n" |
| 53653 | "}\n" |
| 53654 | "\n" |
| 53655 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53656 | "_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53657 | "{\n" |
| 53658 | " return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);\n" |
| 53659 | "}\n" |
| 53660 | "\n" |
| 53661 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53662 | "_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53663 | "{\n" |
| 53664 | " return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n" |
| 53665 | "}\n" |
| 53666 | "\n" |
| 53667 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53668 | "_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53669 | "{\n" |
| 53670 | " return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);\n" |
| 53671 | "}\n" |
| 53672 | "\n" |
| 53673 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53674 | "_mm_haddw_epi8(__m128i __A)\n" |
| 53675 | "{\n" |
| 53676 | " return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);\n" |
| 53677 | "}\n" |
| 53678 | "\n" |
| 53679 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53680 | "_mm_haddd_epi8(__m128i __A)\n" |
| 53681 | "{\n" |
| 53682 | " return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);\n" |
| 53683 | "}\n" |
| 53684 | "\n" |
| 53685 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53686 | "_mm_haddq_epi8(__m128i __A)\n" |
| 53687 | "{\n" |
| 53688 | " return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);\n" |
| 53689 | "}\n" |
| 53690 | "\n" |
| 53691 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53692 | "_mm_haddd_epi16(__m128i __A)\n" |
| 53693 | "{\n" |
| 53694 | " return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);\n" |
| 53695 | "}\n" |
| 53696 | "\n" |
| 53697 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53698 | "_mm_haddq_epi16(__m128i __A)\n" |
| 53699 | "{\n" |
| 53700 | " return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);\n" |
| 53701 | "}\n" |
| 53702 | "\n" |
| 53703 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53704 | "_mm_haddq_epi32(__m128i __A)\n" |
| 53705 | "{\n" |
| 53706 | " return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);\n" |
| 53707 | "}\n" |
| 53708 | "\n" |
| 53709 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53710 | "_mm_haddw_epu8(__m128i __A)\n" |
| 53711 | "{\n" |
| 53712 | " return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);\n" |
| 53713 | "}\n" |
| 53714 | "\n" |
| 53715 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53716 | "_mm_haddd_epu8(__m128i __A)\n" |
| 53717 | "{\n" |
| 53718 | " return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);\n" |
| 53719 | "}\n" |
| 53720 | "\n" |
| 53721 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53722 | "_mm_haddq_epu8(__m128i __A)\n" |
| 53723 | "{\n" |
| 53724 | " return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);\n" |
| 53725 | "}\n" |
| 53726 | "\n" |
| 53727 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53728 | "_mm_haddd_epu16(__m128i __A)\n" |
| 53729 | "{\n" |
| 53730 | " return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);\n" |
| 53731 | "}\n" |
| 53732 | "\n" |
| 53733 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53734 | "_mm_haddq_epu16(__m128i __A)\n" |
| 53735 | "{\n" |
| 53736 | " return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);\n" |
| 53737 | "}\n" |
| 53738 | "\n" |
| 53739 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53740 | "_mm_haddq_epu32(__m128i __A)\n" |
| 53741 | "{\n" |
| 53742 | " return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);\n" |
| 53743 | "}\n" |
| 53744 | "\n" |
| 53745 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53746 | "_mm_hsubw_epi8(__m128i __A)\n" |
| 53747 | "{\n" |
| 53748 | " return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);\n" |
| 53749 | "}\n" |
| 53750 | "\n" |
| 53751 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53752 | "_mm_hsubd_epi16(__m128i __A)\n" |
| 53753 | "{\n" |
| 53754 | " return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);\n" |
| 53755 | "}\n" |
| 53756 | "\n" |
| 53757 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53758 | "_mm_hsubq_epi32(__m128i __A)\n" |
| 53759 | "{\n" |
| 53760 | " return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);\n" |
| 53761 | "}\n" |
| 53762 | "\n" |
| 53763 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53764 | "_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53765 | "{\n" |
| 53766 | " return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));\n" |
| 53767 | "}\n" |
| 53768 | "\n" |
| 53769 | "static __inline__ __m256i __DEFAULT_FN_ATTRS256\n" |
| 53770 | "_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)\n" |
| 53771 | "{\n" |
| 53772 | " return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));\n" |
| 53773 | "}\n" |
| 53774 | "\n" |
| 53775 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53776 | "_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)\n" |
| 53777 | "{\n" |
| 53778 | " return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);\n" |
| 53779 | "}\n" |
| 53780 | "\n" |
| 53781 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53782 | "_mm_rot_epi8(__m128i __A, __m128i __B)\n" |
| 53783 | "{\n" |
| 53784 | " return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);\n" |
| 53785 | "}\n" |
| 53786 | "\n" |
| 53787 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53788 | "_mm_rot_epi16(__m128i __A, __m128i __B)\n" |
| 53789 | "{\n" |
| 53790 | " return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);\n" |
| 53791 | "}\n" |
| 53792 | "\n" |
| 53793 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53794 | "_mm_rot_epi32(__m128i __A, __m128i __B)\n" |
| 53795 | "{\n" |
| 53796 | " return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);\n" |
| 53797 | "}\n" |
| 53798 | "\n" |
| 53799 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53800 | "_mm_rot_epi64(__m128i __A, __m128i __B)\n" |
| 53801 | "{\n" |
| 53802 | " return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);\n" |
| 53803 | "}\n" |
| 53804 | "\n" |
| 53805 | "#define _mm_roti_epi8(A, N) \\\n" |
| 53806 | " (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))\n" |
| 53807 | "\n" |
| 53808 | "#define _mm_roti_epi16(A, N) \\\n" |
| 53809 | " (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))\n" |
| 53810 | "\n" |
| 53811 | "#define _mm_roti_epi32(A, N) \\\n" |
| 53812 | " (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))\n" |
| 53813 | "\n" |
| 53814 | "#define _mm_roti_epi64(A, N) \\\n" |
| 53815 | " (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))\n" |
| 53816 | "\n" |
| 53817 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53818 | "_mm_shl_epi8(__m128i __A, __m128i __B)\n" |
| 53819 | "{\n" |
| 53820 | " return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);\n" |
| 53821 | "}\n" |
| 53822 | "\n" |
| 53823 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53824 | "_mm_shl_epi16(__m128i __A, __m128i __B)\n" |
| 53825 | "{\n" |
| 53826 | " return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);\n" |
| 53827 | "}\n" |
| 53828 | "\n" |
| 53829 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53830 | "_mm_shl_epi32(__m128i __A, __m128i __B)\n" |
| 53831 | "{\n" |
| 53832 | " return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);\n" |
| 53833 | "}\n" |
| 53834 | "\n" |
| 53835 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53836 | "_mm_shl_epi64(__m128i __A, __m128i __B)\n" |
| 53837 | "{\n" |
| 53838 | " return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);\n" |
| 53839 | "}\n" |
| 53840 | "\n" |
| 53841 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53842 | "_mm_sha_epi8(__m128i __A, __m128i __B)\n" |
| 53843 | "{\n" |
| 53844 | " return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);\n" |
| 53845 | "}\n" |
| 53846 | "\n" |
| 53847 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53848 | "_mm_sha_epi16(__m128i __A, __m128i __B)\n" |
| 53849 | "{\n" |
| 53850 | " return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);\n" |
| 53851 | "}\n" |
| 53852 | "\n" |
| 53853 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53854 | "_mm_sha_epi32(__m128i __A, __m128i __B)\n" |
| 53855 | "{\n" |
| 53856 | " return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);\n" |
| 53857 | "}\n" |
| 53858 | "\n" |
| 53859 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53860 | "_mm_sha_epi64(__m128i __A, __m128i __B)\n" |
| 53861 | "{\n" |
| 53862 | " return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);\n" |
| 53863 | "}\n" |
| 53864 | "\n" |
| 53865 | "#define _mm_com_epu8(A, B, N) \\\n" |
| 53866 | " (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \\\n" |
| 53867 | " (__v16qi)(__m128i)(B), (N))\n" |
| 53868 | "\n" |
| 53869 | "#define _mm_com_epu16(A, B, N) \\\n" |
| 53870 | " (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \\\n" |
| 53871 | " (__v8hi)(__m128i)(B), (N))\n" |
| 53872 | "\n" |
| 53873 | "#define _mm_com_epu32(A, B, N) \\\n" |
| 53874 | " (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \\\n" |
| 53875 | " (__v4si)(__m128i)(B), (N))\n" |
| 53876 | "\n" |
| 53877 | "#define _mm_com_epu64(A, B, N) \\\n" |
| 53878 | " (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \\\n" |
| 53879 | " (__v2di)(__m128i)(B), (N))\n" |
| 53880 | "\n" |
| 53881 | "#define _mm_com_epi8(A, B, N) \\\n" |
| 53882 | " (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \\\n" |
| 53883 | " (__v16qi)(__m128i)(B), (N))\n" |
| 53884 | "\n" |
| 53885 | "#define _mm_com_epi16(A, B, N) \\\n" |
| 53886 | " (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \\\n" |
| 53887 | " (__v8hi)(__m128i)(B), (N))\n" |
| 53888 | "\n" |
| 53889 | "#define _mm_com_epi32(A, B, N) \\\n" |
| 53890 | " (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \\\n" |
| 53891 | " (__v4si)(__m128i)(B), (N))\n" |
| 53892 | "\n" |
| 53893 | "#define _mm_com_epi64(A, B, N) \\\n" |
| 53894 | " (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \\\n" |
| 53895 | " (__v2di)(__m128i)(B), (N))\n" |
| 53896 | "\n" |
| 53897 | "#define _MM_PCOMCTRL_LT 0\n" |
| 53898 | "#define _MM_PCOMCTRL_LE 1\n" |
| 53899 | "#define _MM_PCOMCTRL_GT 2\n" |
| 53900 | "#define _MM_PCOMCTRL_GE 3\n" |
| 53901 | "#define _MM_PCOMCTRL_EQ 4\n" |
| 53902 | "#define _MM_PCOMCTRL_NEQ 5\n" |
| 53903 | "#define _MM_PCOMCTRL_FALSE 6\n" |
| 53904 | "#define _MM_PCOMCTRL_TRUE 7\n" |
| 53905 | "\n" |
| 53906 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53907 | "_mm_comlt_epu8(__m128i __A, __m128i __B)\n" |
| 53908 | "{\n" |
| 53909 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);\n" |
| 53910 | "}\n" |
| 53911 | "\n" |
| 53912 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53913 | "_mm_comle_epu8(__m128i __A, __m128i __B)\n" |
| 53914 | "{\n" |
| 53915 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);\n" |
| 53916 | "}\n" |
| 53917 | "\n" |
| 53918 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53919 | "_mm_comgt_epu8(__m128i __A, __m128i __B)\n" |
| 53920 | "{\n" |
| 53921 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);\n" |
| 53922 | "}\n" |
| 53923 | "\n" |
| 53924 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53925 | "_mm_comge_epu8(__m128i __A, __m128i __B)\n" |
| 53926 | "{\n" |
| 53927 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);\n" |
| 53928 | "}\n" |
| 53929 | "\n" |
| 53930 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53931 | "_mm_comeq_epu8(__m128i __A, __m128i __B)\n" |
| 53932 | "{\n" |
| 53933 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);\n" |
| 53934 | "}\n" |
| 53935 | "\n" |
| 53936 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53937 | "_mm_comneq_epu8(__m128i __A, __m128i __B)\n" |
| 53938 | "{\n" |
| 53939 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
| 53940 | "}\n" |
| 53941 | "\n" |
| 53942 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53943 | "_mm_comfalse_epu8(__m128i __A, __m128i __B)\n" |
| 53944 | "{\n" |
| 53945 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
| 53946 | "}\n" |
| 53947 | "\n" |
| 53948 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53949 | "_mm_comtrue_epu8(__m128i __A, __m128i __B)\n" |
| 53950 | "{\n" |
| 53951 | " return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
| 53952 | "}\n" |
| 53953 | "\n" |
| 53954 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53955 | "_mm_comlt_epu16(__m128i __A, __m128i __B)\n" |
| 53956 | "{\n" |
| 53957 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);\n" |
| 53958 | "}\n" |
| 53959 | "\n" |
| 53960 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53961 | "_mm_comle_epu16(__m128i __A, __m128i __B)\n" |
| 53962 | "{\n" |
| 53963 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);\n" |
| 53964 | "}\n" |
| 53965 | "\n" |
| 53966 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53967 | "_mm_comgt_epu16(__m128i __A, __m128i __B)\n" |
| 53968 | "{\n" |
| 53969 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);\n" |
| 53970 | "}\n" |
| 53971 | "\n" |
| 53972 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53973 | "_mm_comge_epu16(__m128i __A, __m128i __B)\n" |
| 53974 | "{\n" |
| 53975 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);\n" |
| 53976 | "}\n" |
| 53977 | "\n" |
| 53978 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53979 | "_mm_comeq_epu16(__m128i __A, __m128i __B)\n" |
| 53980 | "{\n" |
| 53981 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);\n" |
| 53982 | "}\n" |
| 53983 | "\n" |
| 53984 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53985 | "_mm_comneq_epu16(__m128i __A, __m128i __B)\n" |
| 53986 | "{\n" |
| 53987 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
| 53988 | "}\n" |
| 53989 | "\n" |
| 53990 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53991 | "_mm_comfalse_epu16(__m128i __A, __m128i __B)\n" |
| 53992 | "{\n" |
| 53993 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
| 53994 | "}\n" |
| 53995 | "\n" |
| 53996 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 53997 | "_mm_comtrue_epu16(__m128i __A, __m128i __B)\n" |
| 53998 | "{\n" |
| 53999 | " return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
| 54000 | "}\n" |
| 54001 | "\n" |
| 54002 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54003 | "_mm_comlt_epu32(__m128i __A, __m128i __B)\n" |
| 54004 | "{\n" |
| 54005 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);\n" |
| 54006 | "}\n" |
| 54007 | "\n" |
| 54008 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54009 | "_mm_comle_epu32(__m128i __A, __m128i __B)\n" |
| 54010 | "{\n" |
| 54011 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);\n" |
| 54012 | "}\n" |
| 54013 | "\n" |
| 54014 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54015 | "_mm_comgt_epu32(__m128i __A, __m128i __B)\n" |
| 54016 | "{\n" |
| 54017 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);\n" |
| 54018 | "}\n" |
| 54019 | "\n" |
| 54020 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54021 | "_mm_comge_epu32(__m128i __A, __m128i __B)\n" |
| 54022 | "{\n" |
| 54023 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);\n" |
| 54024 | "}\n" |
| 54025 | "\n" |
| 54026 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54027 | "_mm_comeq_epu32(__m128i __A, __m128i __B)\n" |
| 54028 | "{\n" |
| 54029 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);\n" |
| 54030 | "}\n" |
| 54031 | "\n" |
| 54032 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54033 | "_mm_comneq_epu32(__m128i __A, __m128i __B)\n" |
| 54034 | "{\n" |
| 54035 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
| 54036 | "}\n" |
| 54037 | "\n" |
| 54038 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54039 | "_mm_comfalse_epu32(__m128i __A, __m128i __B)\n" |
| 54040 | "{\n" |
| 54041 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
| 54042 | "}\n" |
| 54043 | "\n" |
| 54044 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54045 | "_mm_comtrue_epu32(__m128i __A, __m128i __B)\n" |
| 54046 | "{\n" |
| 54047 | " return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
| 54048 | "}\n" |
| 54049 | "\n" |
| 54050 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54051 | "_mm_comlt_epu64(__m128i __A, __m128i __B)\n" |
| 54052 | "{\n" |
| 54053 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);\n" |
| 54054 | "}\n" |
| 54055 | "\n" |
| 54056 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54057 | "_mm_comle_epu64(__m128i __A, __m128i __B)\n" |
| 54058 | "{\n" |
| 54059 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);\n" |
| 54060 | "}\n" |
| 54061 | "\n" |
| 54062 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54063 | "_mm_comgt_epu64(__m128i __A, __m128i __B)\n" |
| 54064 | "{\n" |
| 54065 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);\n" |
| 54066 | "}\n" |
| 54067 | "\n" |
| 54068 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54069 | "_mm_comge_epu64(__m128i __A, __m128i __B)\n" |
| 54070 | "{\n" |
| 54071 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);\n" |
| 54072 | "}\n" |
| 54073 | "\n" |
| 54074 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54075 | "_mm_comeq_epu64(__m128i __A, __m128i __B)\n" |
| 54076 | "{\n" |
| 54077 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);\n" |
| 54078 | "}\n" |
| 54079 | "\n" |
| 54080 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54081 | "_mm_comneq_epu64(__m128i __A, __m128i __B)\n" |
| 54082 | "{\n" |
| 54083 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
| 54084 | "}\n" |
| 54085 | "\n" |
| 54086 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54087 | "_mm_comfalse_epu64(__m128i __A, __m128i __B)\n" |
| 54088 | "{\n" |
| 54089 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
| 54090 | "}\n" |
| 54091 | "\n" |
| 54092 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54093 | "_mm_comtrue_epu64(__m128i __A, __m128i __B)\n" |
| 54094 | "{\n" |
| 54095 | " return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
| 54096 | "}\n" |
| 54097 | "\n" |
| 54098 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54099 | "_mm_comlt_epi8(__m128i __A, __m128i __B)\n" |
| 54100 | "{\n" |
| 54101 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);\n" |
| 54102 | "}\n" |
| 54103 | "\n" |
| 54104 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54105 | "_mm_comle_epi8(__m128i __A, __m128i __B)\n" |
| 54106 | "{\n" |
| 54107 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);\n" |
| 54108 | "}\n" |
| 54109 | "\n" |
| 54110 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54111 | "_mm_comgt_epi8(__m128i __A, __m128i __B)\n" |
| 54112 | "{\n" |
| 54113 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);\n" |
| 54114 | "}\n" |
| 54115 | "\n" |
| 54116 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54117 | "_mm_comge_epi8(__m128i __A, __m128i __B)\n" |
| 54118 | "{\n" |
| 54119 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);\n" |
| 54120 | "}\n" |
| 54121 | "\n" |
| 54122 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54123 | "_mm_comeq_epi8(__m128i __A, __m128i __B)\n" |
| 54124 | "{\n" |
| 54125 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);\n" |
| 54126 | "}\n" |
| 54127 | "\n" |
| 54128 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54129 | "_mm_comneq_epi8(__m128i __A, __m128i __B)\n" |
| 54130 | "{\n" |
| 54131 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
| 54132 | "}\n" |
| 54133 | "\n" |
| 54134 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54135 | "_mm_comfalse_epi8(__m128i __A, __m128i __B)\n" |
| 54136 | "{\n" |
| 54137 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
| 54138 | "}\n" |
| 54139 | "\n" |
| 54140 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54141 | "_mm_comtrue_epi8(__m128i __A, __m128i __B)\n" |
| 54142 | "{\n" |
| 54143 | " return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
| 54144 | "}\n" |
| 54145 | "\n" |
| 54146 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54147 | "_mm_comlt_epi16(__m128i __A, __m128i __B)\n" |
| 54148 | "{\n" |
| 54149 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);\n" |
| 54150 | "}\n" |
| 54151 | "\n" |
| 54152 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54153 | "_mm_comle_epi16(__m128i __A, __m128i __B)\n" |
| 54154 | "{\n" |
| 54155 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);\n" |
| 54156 | "}\n" |
| 54157 | "\n" |
| 54158 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54159 | "_mm_comgt_epi16(__m128i __A, __m128i __B)\n" |
| 54160 | "{\n" |
| 54161 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);\n" |
| 54162 | "}\n" |
| 54163 | "\n" |
| 54164 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54165 | "_mm_comge_epi16(__m128i __A, __m128i __B)\n" |
| 54166 | "{\n" |
| 54167 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);\n" |
| 54168 | "}\n" |
| 54169 | "\n" |
| 54170 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54171 | "_mm_comeq_epi16(__m128i __A, __m128i __B)\n" |
| 54172 | "{\n" |
| 54173 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);\n" |
| 54174 | "}\n" |
| 54175 | "\n" |
| 54176 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54177 | "_mm_comneq_epi16(__m128i __A, __m128i __B)\n" |
| 54178 | "{\n" |
| 54179 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
| 54180 | "}\n" |
| 54181 | "\n" |
| 54182 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54183 | "_mm_comfalse_epi16(__m128i __A, __m128i __B)\n" |
| 54184 | "{\n" |
| 54185 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
| 54186 | "}\n" |
| 54187 | "\n" |
| 54188 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54189 | "_mm_comtrue_epi16(__m128i __A, __m128i __B)\n" |
| 54190 | "{\n" |
| 54191 | " return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
| 54192 | "}\n" |
| 54193 | "\n" |
| 54194 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54195 | "_mm_comlt_epi32(__m128i __A, __m128i __B)\n" |
| 54196 | "{\n" |
| 54197 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);\n" |
| 54198 | "}\n" |
| 54199 | "\n" |
| 54200 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54201 | "_mm_comle_epi32(__m128i __A, __m128i __B)\n" |
| 54202 | "{\n" |
| 54203 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);\n" |
| 54204 | "}\n" |
| 54205 | "\n" |
| 54206 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54207 | "_mm_comgt_epi32(__m128i __A, __m128i __B)\n" |
| 54208 | "{\n" |
| 54209 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);\n" |
| 54210 | "}\n" |
| 54211 | "\n" |
| 54212 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54213 | "_mm_comge_epi32(__m128i __A, __m128i __B)\n" |
| 54214 | "{\n" |
| 54215 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);\n" |
| 54216 | "}\n" |
| 54217 | "\n" |
| 54218 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54219 | "_mm_comeq_epi32(__m128i __A, __m128i __B)\n" |
| 54220 | "{\n" |
| 54221 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);\n" |
| 54222 | "}\n" |
| 54223 | "\n" |
| 54224 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54225 | "_mm_comneq_epi32(__m128i __A, __m128i __B)\n" |
| 54226 | "{\n" |
| 54227 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
| 54228 | "}\n" |
| 54229 | "\n" |
| 54230 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54231 | "_mm_comfalse_epi32(__m128i __A, __m128i __B)\n" |
| 54232 | "{\n" |
| 54233 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
| 54234 | "}\n" |
| 54235 | "\n" |
| 54236 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54237 | "_mm_comtrue_epi32(__m128i __A, __m128i __B)\n" |
| 54238 | "{\n" |
| 54239 | " return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
| 54240 | "}\n" |
| 54241 | "\n" |
| 54242 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54243 | "_mm_comlt_epi64(__m128i __A, __m128i __B)\n" |
| 54244 | "{\n" |
| 54245 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);\n" |
| 54246 | "}\n" |
| 54247 | "\n" |
| 54248 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54249 | "_mm_comle_epi64(__m128i __A, __m128i __B)\n" |
| 54250 | "{\n" |
| 54251 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);\n" |
| 54252 | "}\n" |
| 54253 | "\n" |
| 54254 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54255 | "_mm_comgt_epi64(__m128i __A, __m128i __B)\n" |
| 54256 | "{\n" |
| 54257 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);\n" |
| 54258 | "}\n" |
| 54259 | "\n" |
| 54260 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54261 | "_mm_comge_epi64(__m128i __A, __m128i __B)\n" |
| 54262 | "{\n" |
| 54263 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);\n" |
| 54264 | "}\n" |
| 54265 | "\n" |
| 54266 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54267 | "_mm_comeq_epi64(__m128i __A, __m128i __B)\n" |
| 54268 | "{\n" |
| 54269 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);\n" |
| 54270 | "}\n" |
| 54271 | "\n" |
| 54272 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54273 | "_mm_comneq_epi64(__m128i __A, __m128i __B)\n" |
| 54274 | "{\n" |
| 54275 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);\n" |
| 54276 | "}\n" |
| 54277 | "\n" |
| 54278 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54279 | "_mm_comfalse_epi64(__m128i __A, __m128i __B)\n" |
| 54280 | "{\n" |
| 54281 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);\n" |
| 54282 | "}\n" |
| 54283 | "\n" |
| 54284 | "static __inline__ __m128i __DEFAULT_FN_ATTRS\n" |
| 54285 | "_mm_comtrue_epi64(__m128i __A, __m128i __B)\n" |
| 54286 | "{\n" |
| 54287 | " return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);\n" |
| 54288 | "}\n" |
| 54289 | "\n" |
| 54290 | "#define _mm_permute2_pd(X, Y, C, I) \\\n" |
| 54291 | " (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \\\n" |
| 54292 | " (__v2df)(__m128d)(Y), \\\n" |
| 54293 | " (__v2di)(__m128i)(C), (I))\n" |
| 54294 | "\n" |
| 54295 | "#define _mm256_permute2_pd(X, Y, C, I) \\\n" |
| 54296 | " (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \\\n" |
| 54297 | " (__v4df)(__m256d)(Y), \\\n" |
| 54298 | " (__v4di)(__m256i)(C), (I))\n" |
| 54299 | "\n" |
| 54300 | "#define _mm_permute2_ps(X, Y, C, I) \\\n" |
| 54301 | " (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \\\n" |
| 54302 | " (__v4si)(__m128i)(C), (I))\n" |
| 54303 | "\n" |
| 54304 | "#define _mm256_permute2_ps(X, Y, C, I) \\\n" |
| 54305 | " (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \\\n" |
| 54306 | " (__v8sf)(__m256)(Y), \\\n" |
| 54307 | " (__v8si)(__m256i)(C), (I))\n" |
| 54308 | "\n" |
| 54309 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 54310 | "_mm_frcz_ss(__m128 __A)\n" |
| 54311 | "{\n" |
| 54312 | " return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);\n" |
| 54313 | "}\n" |
| 54314 | "\n" |
| 54315 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 54316 | "_mm_frcz_sd(__m128d __A)\n" |
| 54317 | "{\n" |
| 54318 | " return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);\n" |
| 54319 | "}\n" |
| 54320 | "\n" |
| 54321 | "static __inline__ __m128 __DEFAULT_FN_ATTRS\n" |
| 54322 | "_mm_frcz_ps(__m128 __A)\n" |
| 54323 | "{\n" |
| 54324 | " return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);\n" |
| 54325 | "}\n" |
| 54326 | "\n" |
| 54327 | "static __inline__ __m128d __DEFAULT_FN_ATTRS\n" |
| 54328 | "_mm_frcz_pd(__m128d __A)\n" |
| 54329 | "{\n" |
| 54330 | " return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);\n" |
| 54331 | "}\n" |
| 54332 | "\n" |
| 54333 | "static __inline__ __m256 __DEFAULT_FN_ATTRS256\n" |
| 54334 | "_mm256_frcz_ps(__m256 __A)\n" |
| 54335 | "{\n" |
| 54336 | " return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);\n" |
| 54337 | "}\n" |
| 54338 | "\n" |
| 54339 | "static __inline__ __m256d __DEFAULT_FN_ATTRS256\n" |
| 54340 | "_mm256_frcz_pd(__m256d __A)\n" |
| 54341 | "{\n" |
| 54342 | " return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);\n" |
| 54343 | "}\n" |
| 54344 | "\n" |
| 54345 | "#undef __DEFAULT_FN_ATTRS\n" |
| 54346 | "#undef __DEFAULT_FN_ATTRS256\n" |
| 54347 | "\n" |
| 54348 | "#endif /* __XOPINTRIN_H */\n" |
| 54349 | "" } , |
| 54350 | { "/builtins/xsavecintrin.h" , "/*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------===\n" |
| 54351 | " *\n" |
| 54352 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 54353 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 54354 | " * in the Software without restriction, including without limitation the rights\n" |
| 54355 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 54356 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 54357 | " * furnished to do so, subject to the following conditions:\n" |
| 54358 | " *\n" |
| 54359 | " * The above copyright notice and this permission notice shall be included in\n" |
| 54360 | " * all copies or substantial portions of the Software.\n" |
| 54361 | " *\n" |
| 54362 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 54363 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 54364 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 54365 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 54366 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 54367 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 54368 | " * THE SOFTWARE.\n" |
| 54369 | " *\n" |
| 54370 | " *===-----------------------------------------------------------------------===\n" |
| 54371 | " */\n" |
| 54372 | "\n" |
| 54373 | "#ifndef __IMMINTRIN_H\n" |
| 54374 | "#error \"Never use <xsavecintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 54375 | "#endif\n" |
| 54376 | "\n" |
| 54377 | "#ifndef __XSAVECINTRIN_H\n" |
| 54378 | "#define __XSAVECINTRIN_H\n" |
| 54379 | "\n" |
| 54380 | "/* Define the default attributes for the functions in this file. */\n" |
| 54381 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsavec\")))\n" |
| 54382 | "\n" |
| 54383 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54384 | "_xsavec(void *__p, unsigned long long __m) {\n" |
| 54385 | " __builtin_ia32_xsavec(__p, __m);\n" |
| 54386 | "}\n" |
| 54387 | "\n" |
| 54388 | "#ifdef __x86_64__\n" |
| 54389 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54390 | "_xsavec64(void *__p, unsigned long long __m) {\n" |
| 54391 | " __builtin_ia32_xsavec64(__p, __m);\n" |
| 54392 | "}\n" |
| 54393 | "#endif\n" |
| 54394 | "\n" |
| 54395 | "#undef __DEFAULT_FN_ATTRS\n" |
| 54396 | "\n" |
| 54397 | "#endif\n" |
| 54398 | "" } , |
| 54399 | { "/builtins/xsaveintrin.h" , "/*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------===\n" |
| 54400 | " *\n" |
| 54401 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 54402 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 54403 | " * in the Software without restriction, including without limitation the rights\n" |
| 54404 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 54405 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 54406 | " * furnished to do so, subject to the following conditions:\n" |
| 54407 | " *\n" |
| 54408 | " * The above copyright notice and this permission notice shall be included in\n" |
| 54409 | " * all copies or substantial portions of the Software.\n" |
| 54410 | " *\n" |
| 54411 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 54412 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 54413 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 54414 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 54415 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 54416 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 54417 | " * THE SOFTWARE.\n" |
| 54418 | " *\n" |
| 54419 | " *===-----------------------------------------------------------------------===\n" |
| 54420 | " */\n" |
| 54421 | "\n" |
| 54422 | "#ifndef __IMMINTRIN_H\n" |
| 54423 | "#error \"Never use <xsaveintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 54424 | "#endif\n" |
| 54425 | "\n" |
| 54426 | "#ifndef __XSAVEINTRIN_H\n" |
| 54427 | "#define __XSAVEINTRIN_H\n" |
| 54428 | "\n" |
| 54429 | "/* Define the default attributes for the functions in this file. */\n" |
| 54430 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsave\")))\n" |
| 54431 | "\n" |
| 54432 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54433 | "_xsave(void *__p, unsigned long long __m) {\n" |
| 54434 | " __builtin_ia32_xsave(__p, __m);\n" |
| 54435 | "}\n" |
| 54436 | "\n" |
| 54437 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54438 | "_xrstor(void *__p, unsigned long long __m) {\n" |
| 54439 | " __builtin_ia32_xrstor(__p, __m);\n" |
| 54440 | "}\n" |
| 54441 | "\n" |
| 54442 | "#ifdef __x86_64__\n" |
| 54443 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54444 | "_xsave64(void *__p, unsigned long long __m) {\n" |
| 54445 | " __builtin_ia32_xsave64(__p, __m);\n" |
| 54446 | "}\n" |
| 54447 | "\n" |
| 54448 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54449 | "_xrstor64(void *__p, unsigned long long __m) {\n" |
| 54450 | " __builtin_ia32_xrstor64(__p, __m);\n" |
| 54451 | "}\n" |
| 54452 | "#endif\n" |
| 54453 | "\n" |
| 54454 | "#undef __DEFAULT_FN_ATTRS\n" |
| 54455 | "\n" |
| 54456 | "#endif\n" |
| 54457 | "" } , |
| 54458 | { "/builtins/xsaveoptintrin.h" , "/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------===\n" |
| 54459 | " *\n" |
| 54460 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 54461 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 54462 | " * in the Software without restriction, including without limitation the rights\n" |
| 54463 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 54464 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 54465 | " * furnished to do so, subject to the following conditions:\n" |
| 54466 | " *\n" |
| 54467 | " * The above copyright notice and this permission notice shall be included in\n" |
| 54468 | " * all copies or substantial portions of the Software.\n" |
| 54469 | " *\n" |
| 54470 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 54471 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 54472 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 54473 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 54474 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 54475 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 54476 | " * THE SOFTWARE.\n" |
| 54477 | " *\n" |
| 54478 | " *===-----------------------------------------------------------------------===\n" |
| 54479 | " */\n" |
| 54480 | "\n" |
| 54481 | "#ifndef __IMMINTRIN_H\n" |
| 54482 | "#error \"Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 54483 | "#endif\n" |
| 54484 | "\n" |
| 54485 | "#ifndef __XSAVEOPTINTRIN_H\n" |
| 54486 | "#define __XSAVEOPTINTRIN_H\n" |
| 54487 | "\n" |
| 54488 | "/* Define the default attributes for the functions in this file. */\n" |
| 54489 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaveopt\")))\n" |
| 54490 | "\n" |
| 54491 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54492 | "_xsaveopt(void *__p, unsigned long long __m) {\n" |
| 54493 | " __builtin_ia32_xsaveopt(__p, __m);\n" |
| 54494 | "}\n" |
| 54495 | "\n" |
| 54496 | "#ifdef __x86_64__\n" |
| 54497 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54498 | "_xsaveopt64(void *__p, unsigned long long __m) {\n" |
| 54499 | " __builtin_ia32_xsaveopt64(__p, __m);\n" |
| 54500 | "}\n" |
| 54501 | "#endif\n" |
| 54502 | "\n" |
| 54503 | "#undef __DEFAULT_FN_ATTRS\n" |
| 54504 | "\n" |
| 54505 | "#endif\n" |
| 54506 | "" } , |
| 54507 | { "/builtins/xsavesintrin.h" , "/*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------===\n" |
| 54508 | " *\n" |
| 54509 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 54510 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 54511 | " * in the Software without restriction, including without limitation the rights\n" |
| 54512 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 54513 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 54514 | " * furnished to do so, subject to the following conditions:\n" |
| 54515 | " *\n" |
| 54516 | " * The above copyright notice and this permission notice shall be included in\n" |
| 54517 | " * all copies or substantial portions of the Software.\n" |
| 54518 | " *\n" |
| 54519 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 54520 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 54521 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 54522 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 54523 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 54524 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 54525 | " * THE SOFTWARE.\n" |
| 54526 | " *\n" |
| 54527 | " *===-----------------------------------------------------------------------===\n" |
| 54528 | " */\n" |
| 54529 | "\n" |
| 54530 | "#ifndef __IMMINTRIN_H\n" |
| 54531 | "#error \"Never use <xsavesintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 54532 | "#endif\n" |
| 54533 | "\n" |
| 54534 | "#ifndef __XSAVESINTRIN_H\n" |
| 54535 | "#define __XSAVESINTRIN_H\n" |
| 54536 | "\n" |
| 54537 | "/* Define the default attributes for the functions in this file. */\n" |
| 54538 | "#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__(\"xsaves\")))\n" |
| 54539 | "\n" |
| 54540 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54541 | "_xsaves(void *__p, unsigned long long __m) {\n" |
| 54542 | " __builtin_ia32_xsaves(__p, __m);\n" |
| 54543 | "}\n" |
| 54544 | "\n" |
| 54545 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54546 | "_xrstors(void *__p, unsigned long long __m) {\n" |
| 54547 | " __builtin_ia32_xrstors(__p, __m);\n" |
| 54548 | "}\n" |
| 54549 | "\n" |
| 54550 | "#ifdef __x86_64__\n" |
| 54551 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54552 | "_xrstors64(void *__p, unsigned long long __m) {\n" |
| 54553 | " __builtin_ia32_xrstors64(__p, __m);\n" |
| 54554 | "}\n" |
| 54555 | "\n" |
| 54556 | "static __inline__ void __DEFAULT_FN_ATTRS\n" |
| 54557 | "_xsaves64(void *__p, unsigned long long __m) {\n" |
| 54558 | " __builtin_ia32_xsaves64(__p, __m);\n" |
| 54559 | "}\n" |
| 54560 | "#endif\n" |
| 54561 | "\n" |
| 54562 | "#undef __DEFAULT_FN_ATTRS\n" |
| 54563 | "\n" |
| 54564 | "#endif\n" |
| 54565 | "" } , |
| 54566 | { "/builtins/xtestintrin.h" , "/*===---- xtestintrin.h - XTEST intrinsic ----------------------------------===\n" |
| 54567 | " *\n" |
| 54568 | " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" |
| 54569 | " * of this software and associated documentation files (the \"Software\"), to deal\n" |
| 54570 | " * in the Software without restriction, including without limitation the rights\n" |
| 54571 | " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" |
| 54572 | " * copies of the Software, and to permit persons to whom the Software is\n" |
| 54573 | " * furnished to do so, subject to the following conditions:\n" |
| 54574 | " *\n" |
| 54575 | " * The above copyright notice and this permission notice shall be included in\n" |
| 54576 | " * all copies or substantial portions of the Software.\n" |
| 54577 | " *\n" |
| 54578 | " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" |
| 54579 | " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" |
| 54580 | " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" |
| 54581 | " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" |
| 54582 | " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" |
| 54583 | " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" |
| 54584 | " * THE SOFTWARE.\n" |
| 54585 | " *\n" |
| 54586 | " *===-----------------------------------------------------------------------===\n" |
| 54587 | " */\n" |
| 54588 | "\n" |
| 54589 | "#ifndef __IMMINTRIN_H\n" |
| 54590 | "#error \"Never use <xtestintrin.h> directly; include <immintrin.h> instead.\"\n" |
| 54591 | "#endif\n" |
| 54592 | "\n" |
| 54593 | "#ifndef __XTESTINTRIN_H\n" |
| 54594 | "#define __XTESTINTRIN_H\n" |
| 54595 | "\n" |
| 54596 | "/* xtest returns non-zero if the instruction is executed within an RTM or active\n" |
| 54597 | " * HLE region. */\n" |
| 54598 | "/* FIXME: This can be an either or for RTM/HLE. Deal with this when HLE is\n" |
| 54599 | " * supported. */\n" |
| 54600 | "static __inline__ int\n" |
| 54601 | " __attribute__((__always_inline__, __nodebug__, __target__(\"rtm\")))\n" |
| 54602 | " _xtest(void) {\n" |
| 54603 | " return __builtin_ia32_xtest();\n" |
| 54604 | "}\n" |
| 54605 | "\n" |
| 54606 | "#endif\n" |
| 54607 | "" } , |
| 54608 | |
| 54609 | {} |
| 54610 | }; |
| 54611 | |
| 54612 | |
| 54613 | |